From edcb4c60ba0bc416152bdfd931598bfa0df87467 Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Thu, 12 Sep 2024 09:44:37 +0300
Subject: [PATCH 01/92] Change Matcher so that phrases are counted as one
 instead of word by word

---
 milli/src/search/new/matches/mod.rs | 45 +++++++++++------------------
 1 file changed, 17 insertions(+), 28 deletions(-)
diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index 4688b8f32..6ddb81c6a 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -132,37 +132,21 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
             mut partial: PartialMatch<'a>,
             token_position: usize,
             word_position: usize,
+            first_word_char_start: &usize,
             words_positions: &mut impl Iterator<Item = (usize, usize, &'a Token<'a>)>,
             matches: &mut Vec<Match>,
         ) -> bool {
-            let mut potential_matches = vec![(token_position, word_position, partial.char_len())];
-
-            for (token_position, word_position, word) in words_positions {
+            for (_, _, word) in words_positions {
                 partial = match partial.match_token(word) {
                     // token matches the partial match, but the match is not full,
                     // we temporarily save the current token then we try to match the next one.
-                    Some(MatchType::Partial(partial)) => {
-                        potential_matches.push((token_position, word_position, partial.char_len()));
-                        partial
-                    }
+                    Some(MatchType::Partial(partial)) => partial,
                     // partial match is now full, we keep this matches and we advance positions
-                    Some(MatchType::Full { char_len, ids }) => {
-                        let ids: Vec<_> = ids.clone().collect();
-                        // save previously matched tokens as matches.
-                        let iter = potential_matches.into_iter().map(
-                            |(token_position, word_position, match_len)| Match {
-                                match_len,
-                                ids: ids.clone(),
-                                word_position,
-                                token_position,
-                            },
-                        );
-                        matches.extend(iter);
-
+                    Some(MatchType::Full { ids, .. }) => {
                         // save the token that closes the partial match as a match.
                         matches.push(Match {
-                            match_len: char_len,
-                            ids,
+                            match_len: word.char_end - first_word_char_start,
+                            ids: ids.clone().collect(),
                             word_position,
                             token_position,
                         });
@@ -221,6 +205,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                             partial,
                             token_position,
                             word_position,
+                            &word.char_start,
                             &mut wp,
                             &mut matches,
                         ) {
@@ -472,15 +457,17 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                                 .enumerate()
                                 .find(|(i, _)| *i == m.match_len)
                                 .map_or(token.byte_end, |(_, (i, _))| i + token.byte_start);
+
                             formatted.push(self.highlight_prefix);
                             formatted.push(&self.text[token.byte_start..highlight_byte_index]);
                             formatted.push(self.highlight_suffix);
+
                             // if it's a prefix highlight, we put the end of the word after the highlight marker.
                             if highlight_byte_index < token.byte_end {
                                 formatted.push(&self.text[highlight_byte_index..token.byte_end]);
                             }
 
-                            byte_index = token.byte_end;
+                            byte_index = token.byte_start + m.match_len;
                         }
                     }
 
@@ -821,22 +808,24 @@ mod tests {
     fn format_highlight_crop_phrase_query() {
         //! testing: https://github.com/meilisearch/meilisearch/issues/3975
         let temp_index = TempIndex::new();
+
+        let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
         temp_index
             .add_documents(documents!([
-                { "id": 1, "text": "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!" }
+                { "id": 1, "text": text }
             ]))
             .unwrap();
+
         let rtxn = temp_index.read_txn().unwrap();
 
         let format_options = FormatOptions { highlight: true, crop: Some(10) };
-        let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
 
         let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
         let mut matcher = builder.build(text, None);
         // should return 10 words with a marker at the start as well the end, and the highlighted matches.
         insta::assert_snapshot!(
             matcher.format(format_options),
-            @"…had the power to split <em>the</em> <em>world</em> between those who…"
+            @"…had the power to split <em>the world</em> between those who…"
         );
 
         let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\"");
@@ -844,7 +833,7 @@ mod tests {
         // should highlight "those" and the phrase "and those".
         insta::assert_snapshot!(
             matcher.format(format_options),
-            @"…world between <em>those</em> who embraced progress <em>and</em> <em>those</em> who resisted…"
+            @"…world between <em>those</em> who embraced progress <em>and those</em> who resisted…"
         );
     }
 
@@ -900,7 +889,7 @@ mod tests {
         let mut matcher = builder.build(text, None);
         insta::assert_snapshot!(
             matcher.format(format_options),
-            @"_the_ _do_ _or_ die can't be he do and or isn'_t_ _he_"
+            @"_the_ _do or_ die can't be he do and or isn'_t he_"
         );
     }
 }

From e7af499314f24e51f1bff27ff231ceb898aa27a1 Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Thu, 12 Sep 2024 16:58:13 +0300
Subject: [PATCH 02/92] Improve changes to Matcher

---
 milli/src/search/new/matches/mod.rs | 136 +++++++++++++++++++++-------
 1 file changed, 104 insertions(+), 32 deletions(-)

diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index 6ddb81c6a..26dd6f6e8 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -93,15 +93,28 @@ impl FormatOptions {
     }
 }
 
+#[derive(Clone, Debug)]
+pub enum MatchPosition {
+    Word {
+        // position of the word in the whole text.
+        word_position: usize,
+        // position of the token in the whole text.
+        token_position: usize,
+    },
+    Phrase {
+        // position of the first and last word in the phrase in the whole text.
+        word_positions: (usize, usize),
+        // position of the first and last token in the phrase in the whole text.
+        token_positions: (usize, usize),
+    },
+}
+
 #[derive(Clone, Debug)]
 pub struct Match {
     match_len: usize,
     // ids of the query words that matches.
     ids: Vec<WordId>,
-    // position of the word in the whole text.
-    word_position: usize,
-    // position of the token in the whole text.
-    token_position: usize,
+    position: MatchPosition,
 }
 
 #[derive(Serialize, Debug, Clone, PartialEq, Eq)]
@@ -130,13 +143,13 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
         /// compute_partial_match peek into next words to validate if the match is complete.
         fn compute_partial_match<'a>(
             mut partial: PartialMatch<'a>,
-            token_position: usize,
-            word_position: usize,
+            first_token_position: usize,
+            first_word_position: usize,
             first_word_char_start: &usize,
             words_positions: &mut impl Iterator<Item = (usize, usize, &'a Token<'a>)>,
             matches: &mut Vec<Match>,
         ) -> bool {
-            for (_, _, word) in words_positions {
+            for (token_position, word_position, word) in words_positions {
                 partial = match partial.match_token(word) {
                     // token matches the partial match, but the match is not full,
                     // we temporarily save the current token then we try to match the next one.
@@ -145,10 +158,12 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                     Some(MatchType::Full { ids, .. }) => {
                         // save the token that closes the partial match as a match.
                         matches.push(Match {
-                            match_len: word.char_end - first_word_char_start,
+                            match_len: word.char_end - *first_word_char_start,
                             ids: ids.clone().collect(),
-                            word_position,
-                            token_position,
+                            position: MatchPosition::Phrase {
+                                word_positions: (first_word_position, word_position),
+                                token_positions: (first_token_position, token_position),
+                            },
                         });
 
                         // the match is complete, we return true.
@@ -191,8 +206,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                         matches.push(Match {
                             match_len: char_len,
                             ids,
-                            word_position,
-                            token_position,
+                            position: MatchPosition::Word { word_position, token_position },
                         });
                         break;
                     }
@@ -228,13 +242,47 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
             Some((tokens, matches)) => matches
                 .iter()
                 .map(|m| MatchBounds {
-                    start: tokens[m.token_position].byte_start,
+                    start: tokens[match m.position {
+                        MatchPosition::Word { token_position, .. } => token_position,
+                        MatchPosition::Phrase {
+                            token_positions: (first_token_position, _),
+                            ..
+                        } => first_token_position,
+                    }]
+                    .byte_start,
                     length: m.match_len,
                 })
                 .collect(),
         }
     }
 
+    // @TODO: This should be improved, looks nasty
+    fn get_match_pos(&self, m: &Match, is_first: bool, is_word: bool) -> usize {
+        match m.position {
+            MatchPosition::Word { word_position, token_position } => {
+                if is_word {
+                    word_position
+                } else {
+                    token_position
+                }
+            }
+            MatchPosition::Phrase { word_positions: (wpf, wpl), token_positions: (tpf, tpl) } => {
+                if is_word {
+                    if is_first {
+                        return wpf;
+                    } else {
+                        return wpl;
+                    }
+                }
+                if is_first {
+                    tpf
+                } else {
+                    tpl
+                }
+            }
+        }
+    }
+
     /// Returns the bounds in byte index of the crop window.
     fn crop_bounds(
         &self,
@@ -243,10 +291,14 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
         crop_size: usize,
     ) -> (usize, usize) {
         // if there is no match, we start from the beginning of the string by default.
-        let first_match_word_position = matches.first().map(|m| m.word_position).unwrap_or(0);
-        let first_match_token_position = matches.first().map(|m| m.token_position).unwrap_or(0);
-        let last_match_word_position = matches.last().map(|m| m.word_position).unwrap_or(0);
-        let last_match_token_position = matches.last().map(|m| m.token_position).unwrap_or(0);
+        let first_match_word_position =
+            matches.first().map(|m| self.get_match_pos(m, true, true)).unwrap_or(0);
+        let first_match_token_position =
+            matches.first().map(|m| self.get_match_pos(m, true, false)).unwrap_or(0);
+        let last_match_word_position =
+            matches.last().map(|m| self.get_match_pos(m, false, true)).unwrap_or(0);
+        let last_match_token_position =
+            matches.last().map(|m| self.get_match_pos(m, false, false)).unwrap_or(0);
 
         // matches needs to be counted in the crop len.
         let mut remaining_words = crop_size + first_match_word_position - last_match_word_position;
@@ -350,7 +402,9 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                 }
 
                 // compute distance between matches
-                distance_score -= (next_match.word_position - m.word_position).min(7) as i16;
+                distance_score -= (self.get_match_pos(next_match, true, true)
+                    - self.get_match_pos(m, true, true))
+                .min(7) as i16;
             }
 
             ids.extend(m.ids.iter());
@@ -378,7 +432,12 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                 // if next match would make interval gross more than crop_size,
                 // we compare the current interval with the best one,
                 // then we increase `interval_first` until next match can be added.
-                if next_match.word_position - matches[interval_first].word_position >= crop_size {
+                let next_match_word_position = self.get_match_pos(next_match, true, true);
+
+                if next_match_word_position
+                    - self.get_match_pos(&matches[interval_first], false, true)
+                    >= crop_size
+                {
                     let interval_score =
                         self.match_interval_score(&matches[interval_first..=interval_last]);
 
@@ -389,10 +448,15 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                     }
 
                     // advance start of the interval while interval is longer than crop_size.
-                    while next_match.word_position - matches[interval_first].word_position
-                        >= crop_size
-                    {
+                    loop {
                         interval_first += 1;
+
+                        if next_match_word_position
+                            - self.get_match_pos(&matches[interval_first], false, true)
+                            < crop_size
+                        {
+                            break;
+                        }
                     }
                 }
                 interval_last = index;
@@ -441,33 +505,41 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                     if format_options.highlight {
                         // insert highlight markers around matches.
                         for m in matches {
-                            let token = &tokens[m.token_position];
+                            let (current_byte_start, current_byte_end) = match m.position {
+                                MatchPosition::Word { token_position, .. } => {
+                                    let token = &tokens[token_position];
+                                    (&token.byte_start, &token.byte_end)
+                                }
+                                MatchPosition::Phrase { token_positions: (ftp, ltp), .. } => {
+                                    (&tokens[ftp].byte_start, &tokens[ltp].byte_end)
+                                }
+                            };
 
                             // skip matches out of the crop window.
-                            if token.byte_start < byte_start || token.byte_end > byte_end {
+                            if *current_byte_start < byte_start || *current_byte_end > byte_end {
                                 continue;
                             }
 
-                            if byte_index < token.byte_start {
-                                formatted.push(&self.text[byte_index..token.byte_start]);
+                            if byte_index < *current_byte_start {
+                                formatted.push(&self.text[byte_index..*current_byte_start]);
                             }
 
-                            let highlight_byte_index = self.text[token.byte_start..]
+                            let highlight_byte_index = self.text[*current_byte_start..]
                                 .char_indices()
                                 .enumerate()
                                 .find(|(i, _)| *i == m.match_len)
-                                .map_or(token.byte_end, |(_, (i, _))| i + token.byte_start);
+                                .map_or(*current_byte_end, |(_, (i, _))| i + *current_byte_start);
 
                             formatted.push(self.highlight_prefix);
-                            formatted.push(&self.text[token.byte_start..highlight_byte_index]);
+                            formatted.push(&self.text[*current_byte_start..highlight_byte_index]);
                             formatted.push(self.highlight_suffix);
 
                             // if it's a prefix highlight, we put the end of the word after the highlight marker.
-                            if highlight_byte_index < token.byte_end {
-                                formatted.push(&self.text[highlight_byte_index..token.byte_end]);
+                            if highlight_byte_index < *current_byte_end {
+                                formatted.push(&self.text[highlight_byte_index..*current_byte_end]);
                             }
 
-                            byte_index = token.byte_start + m.match_len;
+                            byte_index = *current_byte_end;
                         }
                     }
 

From cc6a2aec06ebd6cb7332afb0478affe3e63185af Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Fri, 13 Sep 2024 13:31:07 +0300
Subject: [PATCH 03/92] Improve changes to Matcher

---
 milli/src/search/new/matches/mod.rs | 78 +++++++++++++++--------------
 1 file changed, 41 insertions(+), 37 deletions(-)

diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index 26dd6f6e8..a84b25923 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -93,6 +93,16 @@ impl FormatOptions {
     }
 }
 
+enum FL {
+    First,
+    Last,
+}
+
+enum WT {
+    Word,
+    Token,
+}
+
 #[derive(Clone, Debug)]
 pub enum MatchPosition {
     Word {
@@ -256,28 +266,22 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
         }
     }
 
-    // @TODO: This should be improved, looks nasty
-    fn get_match_pos(&self, m: &Match, is_first: bool, is_word: bool) -> usize {
+    fn get_match_pos(&self, m: &Match, wt: WT, fl: FL) -> usize {
         match m.position {
-            MatchPosition::Word { word_position, token_position } => {
-                if is_word {
-                    word_position
-                } else {
-                    token_position
-                }
-            }
-            MatchPosition::Phrase { word_positions: (wpf, wpl), token_positions: (tpf, tpl) } => {
-                if is_word {
-                    if is_first {
-                        return wpf;
-                    } else {
-                        return wpl;
-                    }
-                }
-                if is_first {
-                    tpf
-                } else {
-                    tpl
+            MatchPosition::Word { word_position, token_position } => match wt {
+                WT::Word => word_position,
+                WT::Token => token_position,
+            },
+            MatchPosition::Phrase { word_positions: (fwp, lwp), token_positions: (ftp, ltp) } => {
+                match wt {
+                    WT::Word => match fl {
+                        FL::First => fwp,
+                        FL::Last => lwp,
+                    },
+                    WT::Token => match fl {
+                        FL::First => ftp,
+                        FL::Last => ltp,
+                    },
                 }
             }
         }
@@ -292,13 +296,13 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
     ) -> (usize, usize) {
         // if there is no match, we start from the beginning of the string by default.
         let first_match_word_position =
-            matches.first().map(|m| self.get_match_pos(m, true, true)).unwrap_or(0);
+            matches.first().map(|m| self.get_match_pos(m, WT::Word, FL::First)).unwrap_or(0);
         let first_match_token_position =
-            matches.first().map(|m| self.get_match_pos(m, true, false)).unwrap_or(0);
+            matches.first().map(|m| self.get_match_pos(m, WT::Token, FL::First)).unwrap_or(0);
         let last_match_word_position =
-            matches.last().map(|m| self.get_match_pos(m, false, true)).unwrap_or(0);
+            matches.last().map(|m| self.get_match_pos(m, WT::Word, FL::Last)).unwrap_or(0);
         let last_match_token_position =
-            matches.last().map(|m| self.get_match_pos(m, false, false)).unwrap_or(0);
+            matches.last().map(|m| self.get_match_pos(m, WT::Token, FL::Last)).unwrap_or(0);
 
         // matches needs to be counted in the crop len.
         let mut remaining_words = crop_size + first_match_word_position - last_match_word_position;
@@ -401,10 +405,12 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                     order_score += 1;
                 }
 
+                let next_match_first_word_pos = self.get_match_pos(next_match, WT::Word, FL::First);
+                let current_match_first_word_pos = self.get_match_pos(m, WT::Word, FL::First);
+
                 // compute distance between matches
-                distance_score -= (self.get_match_pos(next_match, true, true)
-                    - self.get_match_pos(m, true, true))
-                .min(7) as i16;
+                distance_score -=
+                    (next_match_first_word_pos - current_match_first_word_pos).min(7) as i16;
             }
 
             ids.extend(m.ids.iter());
@@ -432,12 +438,11 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                 // if next match would make interval gross more than crop_size,
                 // we compare the current interval with the best one,
                 // then we increase `interval_first` until next match can be added.
-                let next_match_word_position = self.get_match_pos(next_match, true, true);
+                let next_match_word_pos = self.get_match_pos(next_match, WT::Word, FL::First);
+                let mut interval_first_match_word_pos =
+                    self.get_match_pos(&matches[interval_first], WT::Word, FL::Last);
 
-                if next_match_word_position
-                    - self.get_match_pos(&matches[interval_first], false, true)
-                    >= crop_size
-                {
+                if next_match_word_pos - interval_first_match_word_pos >= crop_size {
                     let interval_score =
                         self.match_interval_score(&matches[interval_first..=interval_last]);
 
@@ -450,11 +455,10 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                     // advance start of the interval while interval is longer than crop_size.
                     loop {
                         interval_first += 1;
+                        interval_first_match_word_pos =
+                            self.get_match_pos(&matches[interval_first], WT::Word, FL::Last);
 
-                        if next_match_word_position
-                            - self.get_match_pos(&matches[interval_first], false, true)
-                            < crop_size
-                        {
+                        if next_match_word_pos - interval_first_match_word_pos < crop_size {
                             break;
                         }
                     }

From 65e3d61a955dd9b0f4b877d17a0b2b0dc087816c Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Fri, 13 Sep 2024 13:35:58 +0300
Subject: [PATCH 04/92] Make use of helper function in one more place

---
 milli/src/search/new/matches/mod.rs | 35 ++++++++++++-----------------
 1 file changed, 14 insertions(+), 21 deletions(-)

diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index a84b25923..5a4f0b914 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -245,27 +245,6 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
         self
     }
 
-    /// Returns boundaries of the words that match the query.
-    pub fn matches(&mut self) -> Vec<MatchBounds> {
-        match &self.matches {
-            None => self.compute_matches().matches(),
-            Some((tokens, matches)) => matches
-                .iter()
-                .map(|m| MatchBounds {
-                    start: tokens[match m.position {
-                        MatchPosition::Word { token_position, .. } => token_position,
-                        MatchPosition::Phrase {
-                            token_positions: (first_token_position, _),
-                            ..
-                        } => first_token_position,
-                    }]
-                    .byte_start,
-                    length: m.match_len,
-                })
-                .collect(),
-        }
-    }
-
     fn get_match_pos(&self, m: &Match, wt: WT, fl: FL) -> usize {
         match m.position {
             MatchPosition::Word { word_position, token_position } => match wt {
@@ -287,6 +266,20 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
         }
     }
 
+    /// Returns boundaries of the words that match the query.
+    pub fn matches(&mut self) -> Vec<MatchBounds> {
+        match &self.matches {
+            None => self.compute_matches().matches(),
+            Some((tokens, matches)) => matches
+                .iter()
+                .map(|m| MatchBounds {
+                    start: tokens[self.get_match_pos(m, WT::Token, FL::First)].byte_start,
+                    length: m.match_len,
+                })
+                .collect(),
+        }
+    }
+
     /// Returns the bounds in byte index of the crop window.
     fn crop_bounds(
         &self,

From cab63abc845d87350ab36c07d3999b58eebd0eaa Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Fri, 13 Sep 2024 14:35:28 +0300
Subject: [PATCH 05/92] Improve MatchesPosition enum with an impl

---
 milli/src/search/new/matches/mod.rs | 81 ++++++++++++++---------------
 1 file changed, 40 insertions(+), 41 deletions(-)

diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index 5a4f0b914..ce878a1eb 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -93,16 +93,6 @@ impl FormatOptions {
     }
 }
 
-enum FL {
-    First,
-    Last,
-}
-
-enum WT {
-    Word,
-    Token,
-}
-
 #[derive(Clone, Debug)]
 pub enum MatchPosition {
     Word {
@@ -127,6 +117,36 @@ pub struct Match {
     position: MatchPosition,
 }
 
+impl MatchPosition {
+    fn get_first_word(m: &Match) -> usize {
+        match m.position {
+            MatchPosition::Word { word_position, .. } => word_position,
+            MatchPosition::Phrase { word_positions: (fwp, _), .. } => fwp,
+        }
+    }
+
+    fn get_last_word(m: &Match) -> usize {
+        match m.position {
+            MatchPosition::Word { word_position, .. } => word_position,
+            MatchPosition::Phrase { word_positions: (_, lwp), .. } => lwp,
+        }
+    }
+
+    fn get_first_token(m: &Match) -> usize {
+        match m.position {
+            MatchPosition::Word { token_position, .. } => token_position,
+            MatchPosition::Phrase { token_positions: (ftp, _), .. } => ftp,
+        }
+    }
+
+    fn get_last_token(m: &Match) -> usize {
+        match m.position {
+            MatchPosition::Word { token_position, .. } => token_position,
+            MatchPosition::Phrase { token_positions: (_, ltp), .. } => ltp,
+        }
+    }
+}
+
 #[derive(Serialize, Debug, Clone, PartialEq, Eq)]
 pub struct MatchBounds {
     pub start: usize,
@@ -245,27 +265,6 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
         self
     }
 
-    fn get_match_pos(&self, m: &Match, wt: WT, fl: FL) -> usize {
-        match m.position {
-            MatchPosition::Word { word_position, token_position } => match wt {
-                WT::Word => word_position,
-                WT::Token => token_position,
-            },
-            MatchPosition::Phrase { word_positions: (fwp, lwp), token_positions: (ftp, ltp) } => {
-                match wt {
-                    WT::Word => match fl {
-                        FL::First => fwp,
-                        FL::Last => lwp,
-                    },
-                    WT::Token => match fl {
-                        FL::First => ftp,
-                        FL::Last => ltp,
-                    },
-                }
-            }
-        }
-    }
-
     /// Returns boundaries of the words that match the query.
     pub fn matches(&mut self) -> Vec<MatchBounds> {
         match &self.matches {
@@ -273,7 +272,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
             Some((tokens, matches)) => matches
                 .iter()
                 .map(|m| MatchBounds {
-                    start: tokens[self.get_match_pos(m, WT::Token, FL::First)].byte_start,
+                    start: tokens[MatchPosition::get_first_token(m)].byte_start,
                     length: m.match_len,
                 })
                 .collect(),
@@ -289,13 +288,13 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
     ) -> (usize, usize) {
         // if there is no match, we start from the beginning of the string by default.
         let first_match_word_position =
-            matches.first().map(|m| self.get_match_pos(m, WT::Word, FL::First)).unwrap_or(0);
+            matches.first().map(|m| MatchPosition::get_first_word(m)).unwrap_or(0);
         let first_match_token_position =
-            matches.first().map(|m| self.get_match_pos(m, WT::Token, FL::First)).unwrap_or(0);
+            matches.first().map(|m| MatchPosition::get_first_token(m)).unwrap_or(0);
         let last_match_word_position =
-            matches.last().map(|m| self.get_match_pos(m, WT::Word, FL::Last)).unwrap_or(0);
+            matches.last().map(|m| MatchPosition::get_last_word(m)).unwrap_or(0);
         let last_match_token_position =
-            matches.last().map(|m| self.get_match_pos(m, WT::Token, FL::Last)).unwrap_or(0);
+            matches.last().map(|m| MatchPosition::get_last_token(m)).unwrap_or(0);
 
         // matches needs to be counted in the crop len.
         let mut remaining_words = crop_size + first_match_word_position - last_match_word_position;
@@ -398,8 +397,8 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                     order_score += 1;
                 }
 
-                let next_match_first_word_pos = self.get_match_pos(next_match, WT::Word, FL::First);
-                let current_match_first_word_pos = self.get_match_pos(m, WT::Word, FL::First);
+                let next_match_first_word_pos = MatchPosition::get_first_word(next_match);
+                let current_match_first_word_pos = MatchPosition::get_first_word(m);
 
                 // compute distance between matches
                 distance_score -=
@@ -431,9 +430,9 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                 // if next match would make interval gross more than crop_size,
                 // we compare the current interval with the best one,
                 // then we increase `interval_first` until next match can be added.
-                let next_match_word_pos = self.get_match_pos(next_match, WT::Word, FL::First);
+                let next_match_word_pos = MatchPosition::get_first_word(next_match);
                 let mut interval_first_match_word_pos =
-                    self.get_match_pos(&matches[interval_first], WT::Word, FL::Last);
+                    MatchPosition::get_last_word(&matches[interval_first]);
 
                 if next_match_word_pos - interval_first_match_word_pos >= crop_size {
                     let interval_score =
@@ -449,7 +448,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                     loop {
                         interval_first += 1;
                         interval_first_match_word_pos =
-                            self.get_match_pos(&matches[interval_first], WT::Word, FL::Last);
+                            MatchPosition::get_last_word(&matches[interval_first]);
 
                         if next_match_word_pos - interval_first_match_word_pos < crop_size {
                             break;

From a2a16bf846066f422a5e6bd9bcb0009a894dcad0 Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Fri, 13 Sep 2024 21:20:06 +0300
Subject: [PATCH 06/92] Move MatchPosition impl to Match, adjust counting score
 for phrases

---
 milli/src/search/new/matches/mod.rs | 66 +++++++++++++++++++----------
 1 file changed, 43 insertions(+), 23 deletions(-)

diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index ce878a1eb..e63920145 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -117,30 +117,30 @@ pub struct Match {
     position: MatchPosition,
 }
 
-impl MatchPosition {
-    fn get_first_word(m: &Match) -> usize {
-        match m.position {
+impl Match {
+    fn get_first_word_pos(&self) -> usize {
+        match self.position {
             MatchPosition::Word { word_position, .. } => word_position,
             MatchPosition::Phrase { word_positions: (fwp, _), .. } => fwp,
         }
     }
 
-    fn get_last_word(m: &Match) -> usize {
-        match m.position {
+    fn get_last_word_pos(&self) -> usize {
+        match self.position {
             MatchPosition::Word { word_position, .. } => word_position,
             MatchPosition::Phrase { word_positions: (_, lwp), .. } => lwp,
         }
     }
 
-    fn get_first_token(m: &Match) -> usize {
-        match m.position {
+    fn get_first_token_pos(&self) -> usize {
+        match self.position {
             MatchPosition::Word { token_position, .. } => token_position,
             MatchPosition::Phrase { token_positions: (ftp, _), .. } => ftp,
         }
     }
 
-    fn get_last_token(m: &Match) -> usize {
-        match m.position {
+    fn get_last_token_pos(&self) -> usize {
+        match self.position {
             MatchPosition::Word { token_position, .. } => token_position,
             MatchPosition::Phrase { token_positions: (_, ltp), .. } => ltp,
         }
@@ -272,7 +272,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
             Some((tokens, matches)) => matches
                 .iter()
                 .map(|m| MatchBounds {
-                    start: tokens[MatchPosition::get_first_token(m)].byte_start,
+                    start: tokens[m.get_first_token_pos()].byte_start,
                     length: m.match_len,
                 })
                 .collect(),
@@ -288,13 +288,11 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
     ) -> (usize, usize) {
         // if there is no match, we start from the beginning of the string by default.
         let first_match_word_position =
-            matches.first().map(|m| MatchPosition::get_first_word(m)).unwrap_or(0);
+            matches.first().map(|m| m.get_first_word_pos()).unwrap_or(0);
         let first_match_token_position =
-            matches.first().map(|m| MatchPosition::get_first_token(m)).unwrap_or(0);
-        let last_match_word_position =
-            matches.last().map(|m| MatchPosition::get_last_word(m)).unwrap_or(0);
-        let last_match_token_position =
-            matches.last().map(|m| MatchPosition::get_last_token(m)).unwrap_or(0);
+            matches.first().map(|m| m.get_first_token_pos()).unwrap_or(0);
+        let last_match_word_position = matches.last().map(|m| m.get_last_word_pos()).unwrap_or(0);
+        let last_match_token_position = matches.last().map(|m| m.get_last_token_pos()).unwrap_or(0);
 
         // matches needs to be counted in the crop len.
         let mut remaining_words = crop_size + first_match_word_position - last_match_word_position;
@@ -389,6 +387,16 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
         let mut order_score = 0;
         let mut distance_score = 0;
 
+        // Count score for phrases
+        let tally_phrase_scores =
+            |fwp: &usize, lwp: &usize, order_score: &mut i16, distance_score: &mut i16| {
+                let words_in_phrase_minus_one = (lwp - fwp) as i16;
+                // will always be ordered, so +1 for each space between words
+                *order_score += words_in_phrase_minus_one;
+                // distance will always be 1, so -1 for each space between words
+                *distance_score -= words_in_phrase_minus_one;
+            };
+
         let mut iter = matches.iter().peekable();
         while let Some(m) = iter.next() {
             if let Some(next_match) = iter.peek() {
@@ -397,12 +405,24 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                     order_score += 1;
                 }
 
-                let next_match_first_word_pos = MatchPosition::get_first_word(next_match);
-                let current_match_first_word_pos = MatchPosition::get_first_word(m);
+                let m_last_word_pos = match m.position {
+                    MatchPosition::Word { word_position, .. } => word_position,
+                    MatchPosition::Phrase { word_positions: (fwp, lwp), .. } => {
+                        tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score);
+                        lwp
+                    }
+                };
+
+                let next_match_first_word_pos = match next_match.position {
+                    MatchPosition::Word { word_position, .. } => word_position,
+                    MatchPosition::Phrase { word_positions: (fwp, _), .. } => fwp,
+                };
 
                 // compute distance between matches
-                distance_score -=
-                    (next_match_first_word_pos - current_match_first_word_pos).min(7) as i16;
+                distance_score -= (next_match_first_word_pos - m_last_word_pos).min(7) as i16;
+            } else if let MatchPosition::Phrase { word_positions: (fwp, lwp), .. } = m.position {
+                // in case last match is a phrase, count score for its words
+                tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score);
             }
 
             ids.extend(m.ids.iter());
@@ -430,9 +450,9 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                 // if next match would make interval gross more than crop_size,
                 // we compare the current interval with the best one,
                 // then we increase `interval_first` until next match can be added.
-                let next_match_word_pos = MatchPosition::get_first_word(next_match);
+                let next_match_word_pos = next_match.get_last_word_pos();
                 let mut interval_first_match_word_pos =
-                    MatchPosition::get_last_word(&matches[interval_first]);
+                    matches[interval_first].get_first_word_pos();
 
                 if next_match_word_pos - interval_first_match_word_pos >= crop_size {
                     let interval_score =
@@ -448,7 +468,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                     loop {
                         interval_first += 1;
                         interval_first_match_word_pos =
-                            MatchPosition::get_last_word(&matches[interval_first]);
+                            matches[interval_first].get_first_word_pos();
 
                         if next_match_word_pos - interval_first_match_word_pos < crop_size {
                             break;

From 51085206ccab6e8e0098c4cf8b2a3e67e06558a4 Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Sat, 14 Sep 2024 10:14:07 +0300
Subject: [PATCH 07/92] Misc adjustments

---
 milli/src/search/new/matches/mod.rs | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index e63920145..414509cd3 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -387,7 +387,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
         let mut order_score = 0;
         let mut distance_score = 0;
 
-        // Count score for phrases
+        // count score for phrases
         let tally_phrase_scores =
             |fwp: &usize, lwp: &usize, order_score: &mut i16, distance_score: &mut i16| {
                 let words_in_phrase_minus_one = (lwp - fwp) as i16;
@@ -450,11 +450,11 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                 // if next match would make interval gross more than crop_size,
                 // we compare the current interval with the best one,
                 // then we increase `interval_first` until next match can be added.
-                let next_match_word_pos = next_match.get_last_word_pos();
-                let mut interval_first_match_word_pos =
+                let next_match_last_word_pos = next_match.get_last_word_pos();
+                let mut interval_first_match_first_word_pos =
                     matches[interval_first].get_first_word_pos();
 
-                if next_match_word_pos - interval_first_match_word_pos >= crop_size {
+                if next_match_last_word_pos - interval_first_match_first_word_pos >= crop_size {
                     let interval_score =
                         self.match_interval_score(&matches[interval_first..=interval_last]);
 
@@ -467,10 +467,12 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                     // advance start of the interval while interval is longer than crop_size.
                     loop {
                         interval_first += 1;
-                        interval_first_match_word_pos =
+                        interval_first_match_first_word_pos =
                             matches[interval_first].get_first_word_pos();
 
-                        if next_match_word_pos - interval_first_match_word_pos < crop_size {
+                        if next_match_last_word_pos - interval_first_match_first_word_pos
+                            < crop_size
+                        {
                             break;
                         }
                     }

From 993408d3ba65cbcea9920caeab8b421160a931ac Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Sun, 15 Sep 2024 16:15:09 +0300
Subject: [PATCH 08/92] Change closure to fn

---
 milli/src/search/new/matches/mod.rs | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index 414509cd3..df110aff9 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -388,14 +388,18 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
         let mut distance_score = 0;
 
         // count score for phrases
-        let tally_phrase_scores =
-            |fwp: &usize, lwp: &usize, order_score: &mut i16, distance_score: &mut i16| {
-                let words_in_phrase_minus_one = (lwp - fwp) as i16;
-                // will always be ordered, so +1 for each space between words
-                *order_score += words_in_phrase_minus_one;
-                // distance will always be 1, so -1 for each space between words
-                *distance_score -= words_in_phrase_minus_one;
-            };
+        fn tally_phrase_scores(
+            fwp: &usize,
+            lwp: &usize,
+            order_score: &mut i16,
+            distance_score: &mut i16,
+        ) {
+            let words_in_phrase_minus_one = (lwp - fwp) as i16;
+            // will always be ordered, so +1 for each space between words
+            *order_score += words_in_phrase_minus_one;
+            // distance will always be 1, so -1 for each space between words
+            *distance_score -= words_in_phrase_minus_one;
+        }
 
         let mut iter = matches.iter().peekable();
         while let Some(m) = iter.next() {

From f7337affd6342ae495d99312862b300e7af461e0 Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Tue, 17 Sep 2024 17:31:09 +0300
Subject: [PATCH 09/92] Adjust tests to changes

---
 meilisearch/tests/search/locales.rs | 44 ++++++++++++++---------------
 milli/src/search/new/matches/mod.rs |  2 +-
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs
index dbc4fcc30..b9e70c5b1 100644
--- a/meilisearch/tests/search/locales.rs
+++ b/meilisearch/tests/search/locales.rs
@@ -400,9 +400,9 @@ async fn force_locales() {
                         ]
                       },
                       "_formatted": {
-                        "name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
+                        "name_zh": "<em>进击的巨人</em>",
                         "author_zh": "諫山創",
-                        "description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列，由諫山 創作画。",
+                        "description_zh": "<em>进击的巨人</em>是日本的漫画系列，由諫山 創作画。",
                         "id": "853",
                         "_vectors": {
                           "manual": [
@@ -447,9 +447,9 @@ async fn force_locales() {
                         ]
                       },
                       "_formatted": {
-                        "name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
+                        "name_zh": "<em>进击的巨人</em>",
                         "author_zh": "諫山創",
-                        "description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列，由諫山 創作画。",
+                        "description_zh": "<em>进击的巨人</em>是日本的漫画系列，由諫山 創作画。",
                         "id": "853",
                         "_vectors": {
                           "manual": [
@@ -524,9 +524,9 @@ async fn force_locales_with_pattern() {
                         ]
                       },
                       "_formatted": {
-                        "name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
+                        "name_zh": "<em>进击的巨人</em>",
                         "author_zh": "諫山創",
-                        "description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列，由諫山 創作画。",
+                        "description_zh": "<em>进击的巨人</em>是日本的漫画系列，由諫山 創作画。",
                         "id": "853",
                         "_vectors": {
                           "manual": [
@@ -571,9 +571,9 @@ async fn force_locales_with_pattern() {
                         ]
                       },
                       "_formatted": {
-                        "name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
+                        "name_zh": "<em>进击的巨人</em>",
                         "author_zh": "諫山創",
-                        "description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列，由諫山 創作画。",
+                        "description_zh": "<em>进击的巨人</em>是日本的漫画系列，由諫山 創作画。",
                         "id": "853",
                         "_vectors": {
                           "manual": [
@@ -689,8 +689,8 @@ async fn force_locales_with_pattern_nested() {
                           "author": "諫山 創"
                         },
                         "document_zh": {
-                          "name": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
-                          "description": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列，由諫山 創作画。",
+                          "name": "<em>进击的巨人</em>",
+                          "description": "<em>进击的巨人</em>是日本的漫画系列，由諫山 創作画。",
                           "author": "諫山創"
                         },
                         "id": "852",
@@ -788,9 +788,9 @@ async fn force_different_locales_with_pattern() {
                         ]
                       },
                       "_formatted": {
-                        "name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
+                        "name_zh": "<em>进击的巨人</em>",
                         "author_zh": "諫山創",
-                        "description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列，由諫山 創作画。",
+                        "description_zh": "<em>进击的巨人</em>是日本的漫画系列，由諫山 創作画。",
                         "id": "853",
                         "_vectors": {
                           "manual": [
@@ -889,9 +889,9 @@ async fn auto_infer_locales_at_search_with_attributes_to_search_on() {
                             ]
                           },
                           "_formatted": {
-                            "name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
+                            "name_zh": "<em>进击的巨人</em>",
                             "author_zh": "諫山創",
-                            "description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列，由諫山 創作画。",
+                            "description_zh": "<em>进击的巨人</em>是日本的漫画系列，由諫山 創作画。",
                             "id": "853",
                             "_vectors": {
                               "manual": [
@@ -965,9 +965,9 @@ async fn auto_infer_locales_at_search() {
                         ]
                       },
                       "_formatted": {
-                        "name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
+                        "name_zh": "<em>进击的巨人</em>",
                         "author_zh": "諫山創",
-                        "description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列，由諫山 創作画。",
+                        "description_zh": "<em>进击的巨人</em>是日本的漫画系列，由諫山 創作画。",
                         "id": "853",
                         "_vectors": {
                           "manual": [
@@ -1011,9 +1011,9 @@ async fn auto_infer_locales_at_search() {
                             ]
                           },
                           "_formatted": {
-                            "name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
+                            "name_zh": "<em>进击的巨人</em>",
                             "author_zh": "諫山創",
-                            "description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列，由諫山 創作画。",
+                            "description_zh": "<em>进击的巨人</em>是日本的漫画系列，由諫山 創作画。",
                             "id": "853",
                             "_vectors": {
                               "manual": [
@@ -1057,9 +1057,9 @@ async fn auto_infer_locales_at_search() {
                         ]
                       },
                       "_formatted": {
-                        "name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
+                        "name_zh": "<em>进击的巨人</em>",
                         "author_zh": "諫山創",
-                        "description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列，由諫山 創作画。",
+                        "description_zh": "<em>进击的巨人</em>是日本的漫画系列，由諫山 創作画。",
                         "id": "853",
                         "_vectors": {
                           "manual": [
@@ -1177,8 +1177,8 @@ async fn force_different_locales_with_pattern_nested() {
                           "author": "諫山 創"
                         },
                         "document_zh": {
-                          "name": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
-                          "description": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列，由諫山 創作画。",
+                          "name": "<em>进击的巨人</em>",
+                          "description": "<em>进击的巨人</em>是日本的漫画系列，由諫山 創作画。",
                           "author": "諫山創"
                         },
                         "id": "852",
diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index df110aff9..09d3db575 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -919,7 +919,7 @@ mod tests {
         // should return 10 words with a marker at the start as well the end, and the highlighted matches.
         insta::assert_snapshot!(
             matcher.format(format_options),
-            @"…had the power to split <em>the world</em> between those who…"
+            @"…the power to split <em>the world</em> between those who embraced…"
         );
 
         let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\"");

From 83113998f99bb6d59bb9e94e9ef3e527f4c93f62 Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Wed, 18 Sep 2024 10:35:23 +0300
Subject: [PATCH 10/92] Add more test assertions

---
 milli/src/search/new/matches/mod.rs | 36 +++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index 09d3db575..8a84f91bd 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -929,6 +929,42 @@ mod tests {
             matcher.format(format_options),
             @"…world between <em>those</em> who embraced progress <em>and those</em> who resisted…"
         );
+
+        let builder = MatcherBuilder::new_test(
+            &rtxn,
+            &temp_index,
+            "\"The groundbreaking invention had the power to split the world\"",
+        );
+        let mut matcher = builder.build(text, None);
+        // should highlight "those" and the phrase "and those".
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"<em>The groundbreaking invention had the power to split the world</em>…"
+        );
+
+        let builder = MatcherBuilder::new_test(
+            &rtxn,
+            &temp_index,
+            "\"The groundbreaking invention had the power to split the world between\"",
+        );
+        let mut matcher = builder.build(text, None);
+        // should highlight "those" and the phrase "and those".
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"The groundbreaking invention had the power to split the world …"
+        );
+
+        let builder = MatcherBuilder::new_test(
+            &rtxn,
+            &temp_index,
+            "\"The groundbreaking invention\" \"embraced progress and those who resisted change\"",
+        );
+        let mut matcher = builder.build(text, None);
+        // should highlight "those" and the phrase "and those".
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…between those who <em>embraced progress and those who resisted change</em>…"
+        );
     }
 
     #[test]

From 0ffeea5a5209f1e206720e3cf63d7fe627b8cee0 Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Thu, 19 Sep 2024 09:06:40 +0300
Subject: [PATCH 11/92] Remove wrong comments

---
 milli/src/search/new/matches/mod.rs | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index 8a84f91bd..26115c39b 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -936,7 +936,6 @@ mod tests {
             "\"The groundbreaking invention had the power to split the world\"",
         );
         let mut matcher = builder.build(text, None);
-        // should highlight "those" and the phrase "and those".
         insta::assert_snapshot!(
             matcher.format(format_options),
             @"<em>The groundbreaking invention had the power to split the world</em>…"
@@ -948,7 +947,6 @@ mod tests {
             "\"The groundbreaking invention had the power to split the world between\"",
         );
         let mut matcher = builder.build(text, None);
-        // should highlight "those" and the phrase "and those".
         insta::assert_snapshot!(
             matcher.format(format_options),
             @"The groundbreaking invention had the power to split the world …"
@@ -960,7 +958,6 @@ mod tests {
             "\"The groundbreaking invention\" \"embraced progress and those who resisted change\"",
         );
         let mut matcher = builder.build(text, None);
-        // should highlight "those" and the phrase "and those".
         insta::assert_snapshot!(
             matcher.format(format_options),
             @"…between those who <em>embraced progress and those who resisted change</em>…"

From afa3ae0cbd9c7223d4068dd438d043a43d0d4fae Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 19 Sep 2024 17:42:52 +0200
Subject: [PATCH 12/92] WIP

---
 milli/src/update/index_documents/mod.rs       | 17 ++-----
 .../src/update/index_documents/typed_chunk.rs | 16 ++----
 milli/src/vector/mod.rs                       | 51 +++++++++++--------
 3 files changed, 38 insertions(+), 46 deletions(-)

diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index 326dd842d..b03ab259a 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -689,9 +689,8 @@ where
                         key: None,
                     },
                 )?;
-                let first_id = crate::vector::arroy_db_range_for_embedder(index).next().unwrap();
                 let reader =
-                    ArroyWrapper::new(self.index.vector_arroy, first_id, action.was_quantized);
+                    ArroyWrapper::new(self.index.vector_arroy, index, action.was_quantized);
                 let dim = reader.dimensions(self.wtxn)?;
                 dimension.insert(name.to_string(), dim);
             }
@@ -713,17 +712,11 @@ where
             let is_quantizing = embedder_config.map_or(false, |action| action.is_being_quantized);
 
             pool.install(|| {
-                for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
-                    let mut writer = ArroyWrapper::new(vector_arroy, k, was_quantized);
-                    if is_quantizing {
-                        writer.quantize(wtxn, k, dimension)?;
-                    }
-                    if writer.need_build(wtxn, dimension)? {
-                        writer.build(wtxn, &mut rng, dimension)?;
-                    } else if writer.is_empty(wtxn, dimension)? {
-                        break;
-                    }
+                let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
+                if is_quantizing {
+                    writer.quantize(wtxn, dimension)?;
                 }
+                writer.build(wtxn, &mut rng, dimension)?;
                 Result::Ok(())
             })
             .map_err(InternalError::from)??;
diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs
index 97a4bf712..e340137e2 100644
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -673,22 +673,14 @@ pub(crate) fn write_typed_chunk_into_index(
                 .get(&embedder_name)
                 .map_or(false, |conf| conf.2);
             // FIXME: allow customizing distance
-            let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index)
-                .map(|k| ArroyWrapper::new(index.vector_arroy, k, binary_quantized))
-                .collect();
+            let writer = ArroyWrapper::new(index.vector_arroy, embedder_index, binary_quantized);
 
             // remove vectors for docids we want them removed
             let merger = remove_vectors_builder.build();
             let mut iter = merger.into_stream_merger_iter()?;
             while let Some((key, _)) = iter.next()? {
                 let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
-
-                for writer in &writers {
-                    // Uses invariant: vectors are packed in the first writers.
-                    if !writer.del_item(wtxn, expected_dimension, docid)? {
-                        break;
-                    }
-                }
+                writer.del_item(wtxn, expected_dimension, docid)?;
             }
 
             // add generated embeddings
@@ -716,9 +708,7 @@ pub(crate) fn write_typed_chunk_into_index(
                         embeddings.embedding_count(),
                     )));
                 }
-                for (embedding, writer) in embeddings.iter().zip(&writers) {
-                    writer.add_item(wtxn, expected_dimension, docid, embedding)?;
-                }
+                writer.add_items(wtxn, expected_dimension, docid, embeddings)?;
             }
 
             // perform the manual diff
diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs
index d52e68bbe..644826dcd 100644
--- a/milli/src/vector/mod.rs
+++ b/milli/src/vector/mod.rs
@@ -32,60 +32,69 @@ pub const REQUEST_PARALLELISM: usize = 40;
 
 pub struct ArroyWrapper {
     quantized: bool,
-    index: u16,
+    index: u8,
     database: arroy::Database<Unspecified>,
 }
 
 impl ArroyWrapper {
-    pub fn new(database: arroy::Database<Unspecified>, index: u16, quantized: bool) -> Self {
+    pub fn new(database: arroy::Database<Unspecified>, index: u8, quantized: bool) -> Self {
         Self { database, index, quantized }
     }
 
-    pub fn index(&self) -> u16 {
+    pub fn index(&self) -> u8 {
         self.index
     }
 
     pub fn dimensions(&self, rtxn: &RoTxn) -> Result<usize, arroy::Error> {
+        let first_id = arroy_db_range_for_embedder(self.index).next().unwrap();
         if self.quantized {
-            Ok(arroy::Reader::open(rtxn, self.index, self.quantized_db())?.dimensions())
+            Ok(arroy::Reader::open(rtxn, first_id, self.quantized_db())?.dimensions())
         } else {
-            Ok(arroy::Reader::open(rtxn, self.index, self.angular_db())?.dimensions())
+            Ok(arroy::Reader::open(rtxn, first_id, self.angular_db())?.dimensions())
         }
     }
 
-    pub fn quantize(
-        &mut self,
-        wtxn: &mut RwTxn,
-        index: u16,
-        dimension: usize,
-    ) -> Result<(), arroy::Error> {
+    pub fn quantize(&mut self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
         if !self.quantized {
-            let writer = arroy::Writer::new(self.angular_db(), index, dimension);
-            writer.prepare_changing_distance::<BinaryQuantizedAngular>(wtxn)?;
+            for index in arroy_db_range_for_embedder(self.index) {
+                let writer = arroy::Writer::new(self.angular_db(), index, dimension);
+                writer.prepare_changing_distance::<BinaryQuantizedAngular>(wtxn)?;
+            }
             self.quantized = true;
         }
         Ok(())
     }
 
+    // TODO: We can stop early when we find an empty DB
     pub fn need_build(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
-        if self.quantized {
-            arroy::Writer::new(self.quantized_db(), self.index, dimension).need_build(rtxn)
-        } else {
-            arroy::Writer::new(self.angular_db(), self.index, dimension).need_build(rtxn)
+        for index in arroy_db_range_for_embedder(self.index) {
+            let need_build = if self.quantized {
+                arroy::Writer::new(self.quantized_db(), index, dimension).need_build(rtxn)
+            } else {
+                arroy::Writer::new(self.angular_db(), index, dimension).need_build(rtxn)
+            };
+            if need_build? {
+                return Ok(true);
+            }
         }
+        Ok(false)
     }
 
+    /// TODO: We should early exit when it doesn't need to be built
     pub fn build<R: rand::Rng + rand::SeedableRng>(
         &self,
         wtxn: &mut RwTxn,
         rng: &mut R,
         dimension: usize,
     ) -> Result<(), arroy::Error> {
-        if self.quantized {
-            arroy::Writer::new(self.quantized_db(), self.index, dimension).build(wtxn, rng, None)
-        } else {
-            arroy::Writer::new(self.angular_db(), self.index, dimension).build(wtxn, rng, None)
+        for index in arroy_db_range_for_embedder(self.index) {
+            if self.quantized {
+                arroy::Writer::new(self.quantized_db(), index, dimension).build(wtxn, rng, None)?
+            } else {
+                arroy::Writer::new(self.angular_db(), index, dimension).build(wtxn, rng, None)?
+            }
         }
+        Ok(())
     }
 
     pub fn add_item(

From 6ba4baecbf47e39339c22c67b60a5d0953f53fc5 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 23 Sep 2024 15:15:26 +0200
Subject: [PATCH 13/92] first ugly step

---
 milli/src/search/similar.rs                   |  26 +-
 .../src/update/index_documents/typed_chunk.rs |  45 +---
 milli/src/vector/mod.rs                       | 232 ++++++++++++++----
 3 files changed, 203 insertions(+), 100 deletions(-)

diff --git a/milli/src/search/similar.rs b/milli/src/search/similar.rs
index 0cb8d723d..e408c94b1 100644
--- a/milli/src/search/similar.rs
+++ b/milli/src/search/similar.rs
@@ -4,7 +4,7 @@ use ordered_float::OrderedFloat;
 use roaring::RoaringBitmap;
 
 use crate::score_details::{self, ScoreDetails};
-use crate::vector::Embedder;
+use crate::vector::{ArroyWrapper, Embedder};
 use crate::{filtered_universe, DocumentId, Filter, Index, Result, SearchResult};
 
 pub struct Similar<'a> {
@@ -71,23 +71,13 @@ impl<'a> Similar<'a> {
                 .get(self.rtxn, &self.embedder_name)?
                 .ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?;
 
-        let mut results = Vec::new();
-
-        for reader in self.index.arroy_readers(self.rtxn, embedder_index, self.quantized) {
-            let nns_by_item = reader?.nns_by_item(
-                self.rtxn,
-                self.id,
-                self.limit + self.offset + 1,
-                Some(&universe),
-            )?;
-            if let Some(mut nns_by_item) = nns_by_item {
-                results.append(&mut nns_by_item);
-            } else {
-                break;
-            }
-        }
-
-        results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
+        let reader = ArroyWrapper::new(self.index.vector_arroy, embedder_index, self.quantized);
+        let results = reader.nns_by_item(
+            self.rtxn,
+            self.id,
+            self.limit + self.offset + 1,
+            Some(&universe),
+        )?;
 
         let mut documents_ids = Vec::with_capacity(self.limit);
         let mut document_scores = Vec::with_capacity(self.limit);
diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs
index e340137e2..e118420d8 100644
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -680,7 +680,7 @@ pub(crate) fn write_typed_chunk_into_index(
             let mut iter = merger.into_stream_merger_iter()?;
             while let Some((key, _)) = iter.next()? {
                 let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
-                writer.del_item(wtxn, expected_dimension, docid)?;
+                writer.del_item_raw(wtxn, expected_dimension, docid)?;
             }
 
             // add generated embeddings
@@ -708,7 +708,7 @@ pub(crate) fn write_typed_chunk_into_index(
                         embeddings.embedding_count(),
                     )));
                 }
-                writer.add_items(wtxn, expected_dimension, docid, embeddings)?;
+                writer.add_items(wtxn, docid, &embeddings)?;
             }
 
             // perform the manual diff
@@ -723,51 +723,14 @@ pub(crate) fn write_typed_chunk_into_index(
                 if let Some(value) = vector_deladd_obkv.get(DelAdd::Deletion) {
                     let vector: Vec<f32> = pod_collect_to_vec(value);
 
-                    let mut deleted_index = None;
-                    for (index, writer) in writers.iter().enumerate() {
-                        let Some(candidate) = writer.item_vector(wtxn, docid)? else {
-                            // uses invariant: vectors are packed in the first writers.
-                            break;
-                        };
-                        if candidate == vector {
-                            writer.del_item(wtxn, expected_dimension, docid)?;
-                            deleted_index = Some(index);
-                        }
-                    }
-
-                    // 🥲 enforce invariant: vectors are packed in the first writers.
-                    if let Some(deleted_index) = deleted_index {
-                        let mut last_index_with_a_vector = None;
-                        for (index, writer) in writers.iter().enumerate().skip(deleted_index) {
-                            let Some(candidate) = writer.item_vector(wtxn, docid)? else {
-                                break;
-                            };
-                            last_index_with_a_vector = Some((index, candidate));
-                        }
-                        if let Some((last_index, vector)) = last_index_with_a_vector {
-                            // unwrap: computed the index from the list of writers
-                            let writer = writers.get(last_index).unwrap();
-                            writer.del_item(wtxn, expected_dimension, docid)?;
-                            writers.get(deleted_index).unwrap().add_item(
-                                wtxn,
-                                expected_dimension,
-                                docid,
-                                &vector,
-                            )?;
-                        }
-                    }
+                    writer.del_item(wtxn, docid, &vector)?;
                 }
 
                 if let Some(value) = vector_deladd_obkv.get(DelAdd::Addition) {
                     let vector = pod_collect_to_vec(value);
 
                     // overflow was detected during vector extraction.
-                    for writer in &writers {
-                        if !writer.contains_item(wtxn, expected_dimension, docid)? {
-                            writer.add_item(wtxn, expected_dimension, docid, &vector)?;
-                            break;
-                        }
-                    }
+                    writer.add_item(wtxn, docid, &vector)?;
                 }
             }
 
diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs
index 644826dcd..54765cfef 100644
--- a/milli/src/vector/mod.rs
+++ b/milli/src/vector/mod.rs
@@ -97,49 +97,165 @@ impl ArroyWrapper {
         Ok(())
     }
 
+    pub fn add_items(
+        &self,
+        wtxn: &mut RwTxn,
+        item_id: arroy::ItemId,
+        embeddings: &Embeddings<f32>,
+    ) -> Result<(), arroy::Error> {
+        let dimension = embeddings.dimension();
+        for (index, vector) in arroy_db_range_for_embedder(self.index).zip(embeddings.iter()) {
+            if self.quantized {
+                arroy::Writer::new(self.quantized_db(), index, dimension)
+                    .add_item(wtxn, item_id, vector)?
+            } else {
+                arroy::Writer::new(self.angular_db(), index, dimension)
+                    .add_item(wtxn, item_id, vector)?
+            }
+        }
+        Ok(())
+    }
+
     pub fn add_item(
         &self,
         wtxn: &mut RwTxn,
-        dimension: usize,
         item_id: arroy::ItemId,
         vector: &[f32],
     ) -> Result<(), arroy::Error> {
-        if self.quantized {
-            arroy::Writer::new(self.quantized_db(), self.index, dimension)
-                .add_item(wtxn, item_id, vector)
-        } else {
-            arroy::Writer::new(self.angular_db(), self.index, dimension)
-                .add_item(wtxn, item_id, vector)
+        let dimension = vector.len();
+
+        for index in arroy_db_range_for_embedder(self.index) {
+            if self.quantized {
+                let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
+                if !writer.contains_item(wtxn, item_id)? {
+                    writer.add_item(wtxn, item_id, &vector)?;
+                    break;
+                }
+            } else {
+                arroy::Writer::new(self.angular_db(), index, dimension)
+                    .add_item(wtxn, item_id, vector)?
+            }
         }
+
+        Ok(())
     }
 
-    pub fn del_item(
+    pub fn del_item_raw(
         &self,
         wtxn: &mut RwTxn,
         dimension: usize,
         item_id: arroy::ItemId,
     ) -> Result<bool, arroy::Error> {
-        if self.quantized {
-            arroy::Writer::new(self.quantized_db(), self.index, dimension).del_item(wtxn, item_id)
-        } else {
-            arroy::Writer::new(self.angular_db(), self.index, dimension).del_item(wtxn, item_id)
+        for index in arroy_db_range_for_embedder(self.index) {
+            if self.quantized {
+                let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
+                if writer.del_item(wtxn, item_id)? {
+                    return Ok(true);
+                }
+            } else {
+                let writer = arroy::Writer::new(self.angular_db(), index, dimension);
+                if writer.del_item(wtxn, item_id)? {
+                    return Ok(true);
+                }
+            }
         }
+
+        Ok(false)
+    }
+
+    pub fn del_item(
+        &self,
+        wtxn: &mut RwTxn,
+        itemid: arroy::ItemId,
+        vector: &[f32],
+    ) -> Result<bool, arroy::Error> {
+        let dimension = vector.len();
+        let mut deleted_index = None;
+
+        for index in arroy_db_range_for_embedder(self.index) {
+            if self.quantized {
+                let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
+                let Some(candidate) = writer.item_vector(wtxn, itemid)? else {
+                    // uses invariant: vectors are packed in the first writers.
+                    break;
+                };
+                if candidate == vector {
+                    writer.del_item(wtxn, itemid)?;
+                    deleted_index = Some(index);
+                }
+            } else {
+                let writer = arroy::Writer::new(self.angular_db(), index, dimension);
+                let Some(candidate) = writer.item_vector(wtxn, itemid)? else {
+                    // uses invariant: vectors are packed in the first writers.
+                    break;
+                };
+                if candidate == vector {
+                    writer.del_item(wtxn, itemid)?;
+                    deleted_index = Some(index);
+                }
+            }
+        }
+
+        // 🥲 enforce invariant: vectors are packed in the first writers.
+        if let Some(deleted_index) = deleted_index {
+            let mut last_index_with_a_vector = None;
+            for index in arroy_db_range_for_embedder(self.index).skip(deleted_index as usize) {
+                if self.quantized {
+                    let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
+                    let Some(candidate) = writer.item_vector(wtxn, itemid)? else {
+                        break;
+                    };
+                    last_index_with_a_vector = Some((index, candidate));
+                } else {
+                    let writer = arroy::Writer::new(self.angular_db(), index, dimension);
+                    let Some(candidate) = writer.item_vector(wtxn, itemid)? else {
+                        break;
+                    };
+                    last_index_with_a_vector = Some((index, candidate));
+                }
+            }
+            if let Some((last_index, vector)) = last_index_with_a_vector {
+                if self.quantized {
+                    // unwrap: computed the index from the list of writers
+                    let writer = arroy::Writer::new(self.quantized_db(), last_index, dimension);
+                    writer.del_item(wtxn, itemid)?;
+                    let writer = arroy::Writer::new(self.quantized_db(), deleted_index, dimension);
+                    writer.add_item(wtxn, itemid, &vector)?;
+                } else {
+                    // unwrap: computed the index from the list of writers
+                    let writer = arroy::Writer::new(self.angular_db(), last_index, dimension);
+                    writer.del_item(wtxn, itemid)?;
+                    let writer = arroy::Writer::new(self.angular_db(), deleted_index, dimension);
+                    writer.add_item(wtxn, itemid, &vector)?;
+                }
+            }
+        }
+        Ok(deleted_index.is_some())
     }
 
     pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
-        if self.quantized {
-            arroy::Writer::new(self.quantized_db(), self.index, dimension).clear(wtxn)
-        } else {
-            arroy::Writer::new(self.angular_db(), self.index, dimension).clear(wtxn)
+        for index in arroy_db_range_for_embedder(self.index) {
+            if self.quantized {
+                arroy::Writer::new(self.quantized_db(), index, dimension).clear(wtxn)?;
+            } else {
+                arroy::Writer::new(self.angular_db(), index, dimension).clear(wtxn)?;
+            }
         }
+        Ok(())
     }
 
     pub fn is_empty(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
-        if self.quantized {
-            arroy::Writer::new(self.quantized_db(), self.index, dimension).is_empty(rtxn)
-        } else {
-            arroy::Writer::new(self.angular_db(), self.index, dimension).is_empty(rtxn)
+        for index in arroy_db_range_for_embedder(self.index) {
+            let empty = if self.quantized {
+                arroy::Writer::new(self.quantized_db(), index, dimension).is_empty(rtxn)?
+            } else {
+                arroy::Writer::new(self.angular_db(), index, dimension).is_empty(rtxn)?
+            };
+            if !empty {
+                return Ok(false);
+            }
         }
+        Ok(true)
     }
 
     pub fn contains_item(
@@ -148,11 +264,18 @@ impl ArroyWrapper {
         dimension: usize,
         item: arroy::ItemId,
     ) -> Result<bool, arroy::Error> {
-        if self.quantized {
-            arroy::Writer::new(self.quantized_db(), self.index, dimension).contains_item(rtxn, item)
-        } else {
-            arroy::Writer::new(self.angular_db(), self.index, dimension).contains_item(rtxn, item)
+        for index in arroy_db_range_for_embedder(self.index) {
+            let contains = if self.quantized {
+                arroy::Writer::new(self.quantized_db(), index, dimension)
+                    .contains_item(rtxn, item)?
+            } else {
+                arroy::Writer::new(self.angular_db(), index, dimension).contains_item(rtxn, item)?
+            };
+            if contains {
+                return Ok(contains);
+            }
         }
+        Ok(false)
     }
 
     pub fn nns_by_item(
@@ -161,14 +284,26 @@ impl ArroyWrapper {
         item: ItemId,
         limit: usize,
         filter: Option<&RoaringBitmap>,
-    ) -> Result<Option<Vec<(ItemId, f32)>>, arroy::Error> {
-        if self.quantized {
-            arroy::Reader::open(rtxn, self.index, self.quantized_db())?
-                .nns_by_item(rtxn, item, limit, None, None, filter)
-        } else {
-            arroy::Reader::open(rtxn, self.index, self.angular_db())?
-                .nns_by_item(rtxn, item, limit, None, None, filter)
+    ) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
+        let mut results = Vec::new();
+
+        for index in arroy_db_range_for_embedder(self.index) {
+            let ret = if self.quantized {
+                arroy::Reader::open(rtxn, index, self.quantized_db())?
+                    .nns_by_item(rtxn, item, limit, None, None, filter)?
+            } else {
+                arroy::Reader::open(rtxn, index, self.angular_db())?
+                    .nns_by_item(rtxn, item, limit, None, None, filter)?
+            };
+            if let Some(mut ret) = ret {
+                results.append(&mut ret);
+            } else {
+                break;
+            }
         }
+        results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
+
+        Ok(results)
     }
 
     pub fn nns_by_vector(
@@ -178,21 +313,36 @@ impl ArroyWrapper {
         limit: usize,
         filter: Option<&RoaringBitmap>,
     ) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
-        if self.quantized {
-            arroy::Reader::open(txn, self.index, self.quantized_db())?
-                .nns_by_vector(txn, item, limit, None, None, filter)
-        } else {
-            arroy::Reader::open(txn, self.index, self.angular_db())?
-                .nns_by_vector(txn, item, limit, None, None, filter)
+        let mut results = Vec::new();
+
+        for index in arroy_db_range_for_embedder(self.index) {
+            let mut ret = if self.quantized {
+                arroy::Reader::open(txn, index, self.quantized_db())?
+                    .nns_by_vector(txn, item, limit, None, None, filter)?
+            } else {
+                arroy::Reader::open(txn, index, self.angular_db())?
+                    .nns_by_vector(txn, item, limit, None, None, filter)?
+            };
+            results.append(&mut ret);
         }
+
+        results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
+
+        Ok(results)
     }
 
     pub fn item_vector(&self, rtxn: &RoTxn, docid: u32) -> Result<Option<Vec<f32>>, arroy::Error> {
-        if self.quantized {
-            arroy::Reader::open(rtxn, self.index, self.quantized_db())?.item_vector(rtxn, docid)
-        } else {
-            arroy::Reader::open(rtxn, self.index, self.angular_db())?.item_vector(rtxn, docid)
+        for index in arroy_db_range_for_embedder(self.index) {
+            let ret = if self.quantized {
+                arroy::Reader::open(rtxn, index, self.quantized_db())?.item_vector(rtxn, docid)?
+            } else {
+                arroy::Reader::open(rtxn, index, self.angular_db())?.item_vector(rtxn, docid)?
+            };
+            if ret.is_some() {
+                return Ok(ret);
+            }
         }
+        Ok(None)
     }
 
     fn angular_db(&self) -> arroy::Database<Angular> {

From 1e4d4e69c4cebee8f09d905c5cc8130b08214f04 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 23 Sep 2024 18:56:15 +0200
Subject: [PATCH 14/92] finish the arroywrapper

---
 milli/src/index.rs                            |  29 +--
 milli/src/search/new/vector_sort.rs           |  12 +-
 milli/src/search/similar.rs                   |   1 -
 milli/src/update/index_documents/transform.rs |  63 ++----
 milli/src/vector/mod.rs                       | 211 +++++++++++-------
 5 files changed, 155 insertions(+), 161 deletions(-)

diff --git a/milli/src/index.rs b/milli/src/index.rs
index c47896df7..5b7a9c58c 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -1610,24 +1610,6 @@ impl Index {
             .unwrap_or_default())
     }
 
-    pub fn arroy_readers<'a>(
-        &'a self,
-        rtxn: &'a RoTxn<'a>,
-        embedder_id: u8,
-        quantized: bool,
-    ) -> impl Iterator<Item = Result<ArroyWrapper>> + 'a {
-        crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| {
-            let reader = ArroyWrapper::new(self.vector_arroy, k, quantized);
-            // Here we don't care about the dimensions, but we want to know if we can read
-            // in the database or if its metadata are missing because there is no document with that many vectors.
-            match reader.dimensions(rtxn) {
-                Ok(_) => Some(Ok(reader)),
-                Err(arroy::Error::MissingMetadata(_)) => None,
-                Err(e) => Some(Err(e.into())),
-            }
-        })
-    }
-
     pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> {
         self.main.remap_types::<Str, BEU64>().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff)
     }
@@ -1649,14 +1631,9 @@ impl Index {
         let embedding_configs = self.embedding_configs(rtxn)?;
         for config in embedding_configs {
             let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap();
-            let embeddings = self
-                .arroy_readers(rtxn, embedder_id, config.config.quantized())
-                .map_while(|reader| {
-                    reader
-                        .and_then(|r| r.item_vector(rtxn, docid).map_err(|e| e.into()))
-                        .transpose()
-                })
-                .collect::<Result<Vec<_>>>()?;
+            let reader =
+                ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized());
+            let embeddings = reader.item_vectors(rtxn, docid)?;
             res.insert(config.name.to_owned(), embeddings);
         }
         Ok(res)
diff --git a/milli/src/search/new/vector_sort.rs b/milli/src/search/new/vector_sort.rs
index de1dacbe7..90377c09c 100644
--- a/milli/src/search/new/vector_sort.rs
+++ b/milli/src/search/new/vector_sort.rs
@@ -1,11 +1,10 @@
 use std::iter::FromIterator;
 
-use ordered_float::OrderedFloat;
 use roaring::RoaringBitmap;
 
 use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
 use crate::score_details::{self, ScoreDetails};
-use crate::vector::{DistributionShift, Embedder};
+use crate::vector::{ArroyWrapper, DistributionShift, Embedder};
 use crate::{DocumentId, Result, SearchContext, SearchLogger};
 
 pub struct VectorSort<Q: RankingRuleQueryTrait> {
@@ -53,14 +52,9 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
         vector_candidates: &RoaringBitmap,
     ) -> Result<()> {
         let target = &self.target;
-        let mut results = Vec::new();
 
-        for reader in ctx.index.arroy_readers(ctx.txn, self.embedder_index, self.quantized) {
-            let nns_by_vector =
-                reader?.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
-            results.extend(nns_by_vector.into_iter());
-        }
-        results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
+        let reader = ArroyWrapper::new(ctx.index.vector_arroy, self.embedder_index, self.quantized);
+        let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
         self.cached_sorted_docids = results.into_iter();
 
         Ok(())
diff --git a/milli/src/search/similar.rs b/milli/src/search/similar.rs
index e408c94b1..5547d800e 100644
--- a/milli/src/search/similar.rs
+++ b/milli/src/search/similar.rs
@@ -1,6 +1,5 @@
 use std::sync::Arc;
 
-use ordered_float::OrderedFloat;
 use roaring::RoaringBitmap;
 
 use crate::score_details::{self, ScoreDetails};
diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs
index bb2cfe56c..763f30d0f 100644
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@@ -990,27 +990,24 @@ impl<'a, 'i> Transform<'a, 'i> {
             None
         };
 
-        let readers: Result<BTreeMap<&str, (Vec<ArroyWrapper>, &RoaringBitmap)>> = settings_diff
+        let readers: BTreeMap<&str, (ArroyWrapper, &RoaringBitmap)> = settings_diff
             .embedding_config_updates
             .iter()
             .filter_map(|(name, action)| {
                 if let Some(WriteBackToDocuments { embedder_id, user_provided }) =
                     action.write_back()
                 {
-                    let readers: Result<Vec<_>> = self
-                        .index
-                        .arroy_readers(wtxn, *embedder_id, action.was_quantized)
-                        .collect();
-                    match readers {
-                        Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))),
-                        Err(error) => Some(Err(error)),
-                    }
+                    let reader = ArroyWrapper::new(
+                        self.index.vector_arroy,
+                        *embedder_id,
+                        action.was_quantized,
+                    );
+                    Some((name.as_str(), (reader, user_provided)))
                 } else {
                     None
                 }
             })
             .collect();
-        let readers = readers?;
 
         let old_vectors_fid = settings_diff
             .old
@@ -1048,34 +1045,24 @@ impl<'a, 'i> Transform<'a, 'i> {
                     arroy::Error,
                 > = readers
                     .iter()
-                    .filter_map(|(name, (readers, user_provided))| {
+                    .filter_map(|(name, (reader, user_provided))| {
                         if !user_provided.contains(docid) {
                             return None;
                         }
-                        let mut vectors = Vec::new();
-                        for reader in readers {
-                            let Some(vector) = reader.item_vector(wtxn, docid).transpose() else {
-                                break;
-                            };
-
-                            match vector {
-                                Ok(vector) => vectors.push(vector),
-                                Err(error) => return Some(Err(error)),
-                            }
+                        match reader.item_vectors(wtxn, docid) {
+                            Ok(vectors) if vectors.is_empty() => None,
+                            Ok(vectors) => Some(Ok((
+                                name.to_string(),
+                                serde_json::to_value(ExplicitVectors {
+                                    embeddings: Some(
+                                        VectorOrArrayOfVectors::from_array_of_vectors(vectors),
+                                    ),
+                                    regenerate: false,
+                                })
+                                .unwrap(),
+                            ))),
+                            Err(e) => Some(Err(e)),
                         }
-                        if vectors.is_empty() {
-                            return None;
-                        }
-                        Some(Ok((
-                            name.to_string(),
-                            serde_json::to_value(ExplicitVectors {
-                                embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
-                                    vectors,
-                                )),
-                                regenerate: false,
-                            })
-                            .unwrap(),
-                        )))
                     })
                     .collect();
 
@@ -1104,11 +1091,9 @@ impl<'a, 'i> Transform<'a, 'i> {
         }
 
         // delete all vectors from the embedders that need removal
-        for (_, (readers, _)) in readers {
-            for reader in readers {
-                let dimensions = reader.dimensions(wtxn)?;
-                reader.clear(wtxn, dimensions)?;
-            }
+        for (_, (reader, _)) in readers {
+            let dimensions = reader.dimensions(wtxn)?;
+            reader.clear(wtxn, dimensions)?;
         }
 
         let grenad_params = GrenadParameters {
diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs
index 54765cfef..b5b6cd953 100644
--- a/milli/src/vector/mod.rs
+++ b/milli/src/vector/mod.rs
@@ -45,6 +45,20 @@ impl ArroyWrapper {
         self.index
     }
 
+    fn readers<'a, D: arroy::Distance>(
+        &'a self,
+        rtxn: &'a RoTxn<'a>,
+        db: arroy::Database<D>,
+    ) -> impl Iterator<Item = Result<arroy::Reader<D>, arroy::Error>> + 'a {
+        arroy_db_range_for_embedder(self.index).map_while(move |index| {
+            match arroy::Reader::open(rtxn, index, db) {
+                Ok(reader) => Some(Ok(reader)),
+                Err(arroy::Error::MissingMetadata(_)) => None,
+                Err(e) => Some(Err(e)),
+            }
+        })
+    }
+
     pub fn dimensions(&self, rtxn: &RoTxn) -> Result<usize, arroy::Error> {
         let first_id = arroy_db_range_for_embedder(self.index).next().unwrap();
         if self.quantized {
@@ -97,6 +111,7 @@ impl ArroyWrapper {
         Ok(())
     }
 
+    /// Overwrite all the embeddings associated to the index and item id.
     pub fn add_items(
         &self,
         wtxn: &mut RwTxn,
@@ -116,30 +131,41 @@ impl ArroyWrapper {
         Ok(())
     }
 
+    /// Add one document int for this index where we can find an empty spot.
     pub fn add_item(
         &self,
         wtxn: &mut RwTxn,
         item_id: arroy::ItemId,
         vector: &[f32],
+    ) -> Result<(), arroy::Error> {
+        if self.quantized {
+            self._add_item(wtxn, self.quantized_db(), item_id, vector)
+        } else {
+            self._add_item(wtxn, self.angular_db(), item_id, vector)
+        }
+    }
+
+    fn _add_item<D: arroy::Distance>(
+        &self,
+        wtxn: &mut RwTxn,
+        db: arroy::Database<D>,
+        item_id: arroy::ItemId,
+        vector: &[f32],
     ) -> Result<(), arroy::Error> {
         let dimension = vector.len();
 
         for index in arroy_db_range_for_embedder(self.index) {
-            if self.quantized {
-                let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
-                if !writer.contains_item(wtxn, item_id)? {
-                    writer.add_item(wtxn, item_id, &vector)?;
-                    break;
-                }
-            } else {
-                arroy::Writer::new(self.angular_db(), index, dimension)
-                    .add_item(wtxn, item_id, vector)?
+            let writer = arroy::Writer::new(db, index, dimension);
+            if !writer.contains_item(wtxn, item_id)? {
+                writer.add_item(wtxn, item_id, vector)?;
+                break;
             }
         }
-
         Ok(())
     }
 
+    /// Delete an item from the index. It **does not** take care of fixing the hole
+    /// made after deleting the item.
     pub fn del_item_raw(
         &self,
         wtxn: &mut RwTxn,
@@ -163,36 +189,39 @@ impl ArroyWrapper {
         Ok(false)
     }
 
+    /// Delete one item.
     pub fn del_item(
         &self,
         wtxn: &mut RwTxn,
-        itemid: arroy::ItemId,
+        item_id: arroy::ItemId,
+        vector: &[f32],
+    ) -> Result<bool, arroy::Error> {
+        if self.quantized {
+            self._del_item(wtxn, self.quantized_db(), item_id, vector)
+        } else {
+            self._del_item(wtxn, self.angular_db(), item_id, vector)
+        }
+    }
+
+    fn _del_item<D: arroy::Distance>(
+        &self,
+        wtxn: &mut RwTxn,
+        db: arroy::Database<D>,
+        item_id: arroy::ItemId,
         vector: &[f32],
     ) -> Result<bool, arroy::Error> {
         let dimension = vector.len();
         let mut deleted_index = None;
 
         for index in arroy_db_range_for_embedder(self.index) {
-            if self.quantized {
-                let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
-                let Some(candidate) = writer.item_vector(wtxn, itemid)? else {
-                    // uses invariant: vectors are packed in the first writers.
-                    break;
-                };
-                if candidate == vector {
-                    writer.del_item(wtxn, itemid)?;
-                    deleted_index = Some(index);
-                }
-            } else {
-                let writer = arroy::Writer::new(self.angular_db(), index, dimension);
-                let Some(candidate) = writer.item_vector(wtxn, itemid)? else {
-                    // uses invariant: vectors are packed in the first writers.
-                    break;
-                };
-                if candidate == vector {
-                    writer.del_item(wtxn, itemid)?;
-                    deleted_index = Some(index);
-                }
+            let writer = arroy::Writer::new(db, index, dimension);
+            let Some(candidate) = writer.item_vector(wtxn, item_id)? else {
+                // uses invariant: vectors are packed in the first writers.
+                break;
+            };
+            if candidate == vector {
+                writer.del_item(wtxn, item_id)?;
+                deleted_index = Some(index);
             }
         }
 
@@ -200,34 +229,18 @@ impl ArroyWrapper {
         if let Some(deleted_index) = deleted_index {
             let mut last_index_with_a_vector = None;
             for index in arroy_db_range_for_embedder(self.index).skip(deleted_index as usize) {
-                if self.quantized {
-                    let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
-                    let Some(candidate) = writer.item_vector(wtxn, itemid)? else {
-                        break;
-                    };
-                    last_index_with_a_vector = Some((index, candidate));
-                } else {
-                    let writer = arroy::Writer::new(self.angular_db(), index, dimension);
-                    let Some(candidate) = writer.item_vector(wtxn, itemid)? else {
-                        break;
-                    };
-                    last_index_with_a_vector = Some((index, candidate));
-                }
+                let writer = arroy::Writer::new(db, index, dimension);
+                let Some(candidate) = writer.item_vector(wtxn, item_id)? else {
+                    break;
+                };
+                last_index_with_a_vector = Some((index, candidate));
             }
             if let Some((last_index, vector)) = last_index_with_a_vector {
-                if self.quantized {
-                    // unwrap: computed the index from the list of writers
-                    let writer = arroy::Writer::new(self.quantized_db(), last_index, dimension);
-                    writer.del_item(wtxn, itemid)?;
-                    let writer = arroy::Writer::new(self.quantized_db(), deleted_index, dimension);
-                    writer.add_item(wtxn, itemid, &vector)?;
-                } else {
-                    // unwrap: computed the index from the list of writers
-                    let writer = arroy::Writer::new(self.angular_db(), last_index, dimension);
-                    writer.del_item(wtxn, itemid)?;
-                    let writer = arroy::Writer::new(self.angular_db(), deleted_index, dimension);
-                    writer.add_item(wtxn, itemid, &vector)?;
-                }
+                // unwrap: computed the index from the list of writers
+                let writer = arroy::Writer::new(db, last_index, dimension);
+                writer.del_item(wtxn, item_id)?;
+                let writer = arroy::Writer::new(db, deleted_index, dimension);
+                writer.add_item(wtxn, item_id, &vector)?;
             }
         }
         Ok(deleted_index.is_some())
@@ -284,17 +297,26 @@ impl ArroyWrapper {
         item: ItemId,
         limit: usize,
         filter: Option<&RoaringBitmap>,
+    ) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
+        if self.quantized {
+            self._nns_by_item(rtxn, self.quantized_db(), item, limit, filter)
+        } else {
+            self._nns_by_item(rtxn, self.angular_db(), item, limit, filter)
+        }
+    }
+
+    fn _nns_by_item<D: arroy::Distance>(
+        &self,
+        rtxn: &RoTxn,
+        db: arroy::Database<D>,
+        item: ItemId,
+        limit: usize,
+        filter: Option<&RoaringBitmap>,
     ) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
         let mut results = Vec::new();
 
-        for index in arroy_db_range_for_embedder(self.index) {
-            let ret = if self.quantized {
-                arroy::Reader::open(rtxn, index, self.quantized_db())?
-                    .nns_by_item(rtxn, item, limit, None, None, filter)?
-            } else {
-                arroy::Reader::open(rtxn, index, self.angular_db())?
-                    .nns_by_item(rtxn, item, limit, None, None, filter)?
-            };
+        for reader in self.readers(rtxn, db) {
+            let ret = reader?.nns_by_item(rtxn, item, limit, None, None, filter)?;
             if let Some(mut ret) = ret {
                 results.append(&mut ret);
             } else {
@@ -302,27 +324,35 @@ impl ArroyWrapper {
             }
         }
         results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
-
         Ok(results)
     }
 
     pub fn nns_by_vector(
         &self,
-        txn: &RoTxn,
-        item: &[f32],
+        rtxn: &RoTxn,
+        vector: &[f32],
+        limit: usize,
+        filter: Option<&RoaringBitmap>,
+    ) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
+        if self.quantized {
+            self._nns_by_vector(rtxn, self.quantized_db(), vector, limit, filter)
+        } else {
+            self._nns_by_vector(rtxn, self.angular_db(), vector, limit, filter)
+        }
+    }
+
+    fn _nns_by_vector<D: arroy::Distance>(
+        &self,
+        rtxn: &RoTxn,
+        db: arroy::Database<D>,
+        vector: &[f32],
         limit: usize,
         filter: Option<&RoaringBitmap>,
     ) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
         let mut results = Vec::new();
 
-        for index in arroy_db_range_for_embedder(self.index) {
-            let mut ret = if self.quantized {
-                arroy::Reader::open(txn, index, self.quantized_db())?
-                    .nns_by_vector(txn, item, limit, None, None, filter)?
-            } else {
-                arroy::Reader::open(txn, index, self.angular_db())?
-                    .nns_by_vector(txn, item, limit, None, None, filter)?
-            };
+        for reader in self.readers(rtxn, db) {
+            let mut ret = reader?.nns_by_vector(rtxn, vector, limit, None, None, filter)?;
             results.append(&mut ret);
         }
 
@@ -331,18 +361,27 @@ impl ArroyWrapper {
         Ok(results)
     }
 
-    pub fn item_vector(&self, rtxn: &RoTxn, docid: u32) -> Result<Option<Vec<f32>>, arroy::Error> {
-        for index in arroy_db_range_for_embedder(self.index) {
-            let ret = if self.quantized {
-                arroy::Reader::open(rtxn, index, self.quantized_db())?.item_vector(rtxn, docid)?
-            } else {
-                arroy::Reader::open(rtxn, index, self.angular_db())?.item_vector(rtxn, docid)?
-            };
-            if ret.is_some() {
-                return Ok(ret);
+    pub fn item_vectors(&self, rtxn: &RoTxn, item_id: u32) -> Result<Vec<Vec<f32>>, arroy::Error> {
+        let mut vectors = Vec::new();
+
+        if self.quantized {
+            for reader in self.readers(rtxn, self.quantized_db()) {
+                if let Some(vec) = reader?.item_vector(rtxn, item_id)? {
+                    vectors.push(vec);
+                } else {
+                    break;
+                }
+            }
+        } else {
+            for reader in self.readers(rtxn, self.angular_db()) {
+                if let Some(vec) = reader?.item_vector(rtxn, item_id)? {
+                    vectors.push(vec);
+                } else {
+                    break;
+                }
             }
         }
-        Ok(None)
+        Ok(vectors)
     }
 
     fn angular_db(&self) -> arroy::Database<Angular> {

From 0704fb71e97ce20fbe3ed5f5af6ad53da3a3d67f Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 24 Sep 2024 09:44:29 +0200
Subject: [PATCH 15/92] Fix bench by adding embedder

---
 .../search/embeddings-movies-subset-hf.json   | 21 ++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/workloads/search/embeddings-movies-subset-hf.json b/workloads/search/embeddings-movies-subset-hf.json
index aeeecac59..36f45cfb9 100644
--- a/workloads/search/embeddings-movies-subset-hf.json
+++ b/workloads/search/embeddings-movies-subset-hf.json
@@ -77,7 +77,8 @@
           "q": "puppy cute comforting movie",
           "limit": 100,
           "hybrid": {
-            "semanticRatio": 0.1
+            "semanticRatio": 0.1,
+            "embedder": "default"
           }
         }
       },
@@ -91,7 +92,8 @@
           "q": "puppy cute comforting movie",
           "limit": 100,
           "hybrid": {
-            "semanticRatio": 0.5
+            "semanticRatio": 0.5,
+            "embedder": "default"
           }
         }
       },
@@ -105,7 +107,8 @@
           "q": "puppy cute comforting movie",
           "limit": 100,
           "hybrid": {
-            "semanticRatio": 0.9
+            "semanticRatio": 0.9,
+            "embedder": "default"
           }
         }
       },
@@ -119,7 +122,8 @@
           "q": "puppy cute comforting movie",
           "limit": 100,
           "hybrid": {
-            "semanticRatio": 1.0
+            "semanticRatio": 1.0,
+            "embedder": "default"
           }
         }
       },
@@ -133,7 +137,8 @@
           "q": "shrek",
           "limit": 100,
           "hybrid": {
-            "semanticRatio": 1.0
+            "semanticRatio": 1.0,
+            "embedder": "default"
           }
         }
       },
@@ -147,7 +152,8 @@
           "q": "shrek",
           "limit": 100,
           "hybrid": {
-            "semanticRatio": 0.5
+            "semanticRatio": 0.5,
+            "embedder": "default"
           }
         }
       },
@@ -161,7 +167,8 @@
           "q": "shrek",
           "limit": 100,
           "hybrid": {
-            "semanticRatio": 0.1
+            "semanticRatio": 0.1,
+            "embedder": "default"
           }
         }
       },

From 86da0e83fe9043ff84d27ec7eb98e0ccd312b98e Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 24 Sep 2024 10:02:53 +0200
Subject: [PATCH 16/92] Upgrade "batch failed" log to ERROR level

---
 index-scheduler/src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index fe8244f9b..e0e2bfb75 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -1263,7 +1263,7 @@ impl IndexScheduler {
                     #[cfg(test)]
                     self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?;
 
-                    tracing::info!("Batch failed {}", error);
+                    tracing::error!("Batch failed {}", error);
 
                     self.update_task(&mut wtxn, &task)
                         .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;

From 79d8a7a51a13fc089c3ebe58721302c856191d8d Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 24 Sep 2024 10:36:28 +0200
Subject: [PATCH 17/92] rename the embedder index for clarity

---
 milli/src/vector/mod.rs | 42 ++++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs
index b5b6cd953..2da8ecd57 100644
--- a/milli/src/vector/mod.rs
+++ b/milli/src/vector/mod.rs
@@ -32,17 +32,21 @@ pub const REQUEST_PARALLELISM: usize = 40;
 
 pub struct ArroyWrapper {
     quantized: bool,
-    index: u8,
+    embedder_index: u8,
     database: arroy::Database<Unspecified>,
 }
 
 impl ArroyWrapper {
-    pub fn new(database: arroy::Database<Unspecified>, index: u8, quantized: bool) -> Self {
-        Self { database, index, quantized }
+    pub fn new(
+        database: arroy::Database<Unspecified>,
+        embedder_index: u8,
+        quantized: bool,
+    ) -> Self {
+        Self { database, embedder_index, quantized }
     }
 
     pub fn index(&self) -> u8 {
-        self.index
+        self.embedder_index
     }
 
     fn readers<'a, D: arroy::Distance>(
@@ -50,7 +54,7 @@ impl ArroyWrapper {
         rtxn: &'a RoTxn<'a>,
         db: arroy::Database<D>,
     ) -> impl Iterator<Item = Result<arroy::Reader<D>, arroy::Error>> + 'a {
-        arroy_db_range_for_embedder(self.index).map_while(move |index| {
+        arroy_db_range_for_embedder(self.embedder_index).map_while(move |index| {
             match arroy::Reader::open(rtxn, index, db) {
                 Ok(reader) => Some(Ok(reader)),
                 Err(arroy::Error::MissingMetadata(_)) => None,
@@ -60,7 +64,7 @@ impl ArroyWrapper {
     }
 
     pub fn dimensions(&self, rtxn: &RoTxn) -> Result<usize, arroy::Error> {
-        let first_id = arroy_db_range_for_embedder(self.index).next().unwrap();
+        let first_id = arroy_db_range_for_embedder(self.embedder_index).next().unwrap();
         if self.quantized {
             Ok(arroy::Reader::open(rtxn, first_id, self.quantized_db())?.dimensions())
         } else {
@@ -70,7 +74,7 @@ impl ArroyWrapper {
 
     pub fn quantize(&mut self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
         if !self.quantized {
-            for index in arroy_db_range_for_embedder(self.index) {
+            for index in arroy_db_range_for_embedder(self.embedder_index) {
                 let writer = arroy::Writer::new(self.angular_db(), index, dimension);
                 writer.prepare_changing_distance::<BinaryQuantizedAngular>(wtxn)?;
             }
@@ -81,7 +85,7 @@ impl ArroyWrapper {
 
     // TODO: We can stop early when we find an empty DB
     pub fn need_build(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
-        for index in arroy_db_range_for_embedder(self.index) {
+        for index in arroy_db_range_for_embedder(self.embedder_index) {
             let need_build = if self.quantized {
                 arroy::Writer::new(self.quantized_db(), index, dimension).need_build(rtxn)
             } else {
@@ -101,7 +105,7 @@ impl ArroyWrapper {
         rng: &mut R,
         dimension: usize,
     ) -> Result<(), arroy::Error> {
-        for index in arroy_db_range_for_embedder(self.index) {
+        for index in arroy_db_range_for_embedder(self.embedder_index) {
             if self.quantized {
                 arroy::Writer::new(self.quantized_db(), index, dimension).build(wtxn, rng, None)?
             } else {
@@ -119,7 +123,9 @@ impl ArroyWrapper {
         embeddings: &Embeddings<f32>,
     ) -> Result<(), arroy::Error> {
         let dimension = embeddings.dimension();
-        for (index, vector) in arroy_db_range_for_embedder(self.index).zip(embeddings.iter()) {
+        for (index, vector) in
+            arroy_db_range_for_embedder(self.embedder_index).zip(embeddings.iter())
+        {
             if self.quantized {
                 arroy::Writer::new(self.quantized_db(), index, dimension)
                     .add_item(wtxn, item_id, vector)?
@@ -154,7 +160,7 @@ impl ArroyWrapper {
     ) -> Result<(), arroy::Error> {
         let dimension = vector.len();
 
-        for index in arroy_db_range_for_embedder(self.index) {
+        for index in arroy_db_range_for_embedder(self.embedder_index) {
             let writer = arroy::Writer::new(db, index, dimension);
             if !writer.contains_item(wtxn, item_id)? {
                 writer.add_item(wtxn, item_id, vector)?;
@@ -172,7 +178,7 @@ impl ArroyWrapper {
         dimension: usize,
         item_id: arroy::ItemId,
     ) -> Result<bool, arroy::Error> {
-        for index in arroy_db_range_for_embedder(self.index) {
+        for index in arroy_db_range_for_embedder(self.embedder_index) {
             if self.quantized {
                 let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
                 if writer.del_item(wtxn, item_id)? {
@@ -213,7 +219,7 @@ impl ArroyWrapper {
         let dimension = vector.len();
         let mut deleted_index = None;
 
-        for index in arroy_db_range_for_embedder(self.index) {
+        for index in arroy_db_range_for_embedder(self.embedder_index) {
             let writer = arroy::Writer::new(db, index, dimension);
             let Some(candidate) = writer.item_vector(wtxn, item_id)? else {
                 // uses invariant: vectors are packed in the first writers.
@@ -228,7 +234,9 @@ impl ArroyWrapper {
         // 🥲 enforce invariant: vectors are packed in the first writers.
         if let Some(deleted_index) = deleted_index {
             let mut last_index_with_a_vector = None;
-            for index in arroy_db_range_for_embedder(self.index).skip(deleted_index as usize) {
+            for index in
+                arroy_db_range_for_embedder(self.embedder_index).skip(deleted_index as usize)
+            {
                 let writer = arroy::Writer::new(db, index, dimension);
                 let Some(candidate) = writer.item_vector(wtxn, item_id)? else {
                     break;
@@ -247,7 +255,7 @@ impl ArroyWrapper {
     }
 
     pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
-        for index in arroy_db_range_for_embedder(self.index) {
+        for index in arroy_db_range_for_embedder(self.embedder_index) {
             if self.quantized {
                 arroy::Writer::new(self.quantized_db(), index, dimension).clear(wtxn)?;
             } else {
@@ -258,7 +266,7 @@ impl ArroyWrapper {
     }
 
     pub fn is_empty(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
-        for index in arroy_db_range_for_embedder(self.index) {
+        for index in arroy_db_range_for_embedder(self.embedder_index) {
             let empty = if self.quantized {
                 arroy::Writer::new(self.quantized_db(), index, dimension).is_empty(rtxn)?
             } else {
@@ -277,7 +285,7 @@ impl ArroyWrapper {
         dimension: usize,
         item: arroy::ItemId,
     ) -> Result<bool, arroy::Error> {
-        for index in arroy_db_range_for_embedder(self.index) {
+        for index in arroy_db_range_for_embedder(self.embedder_index) {
             let contains = if self.quantized {
                 arroy::Writer::new(self.quantized_db(), index, dimension)
                     .contains_item(rtxn, item)?

From f2d187ba3e779c0644ad0e1dbf3174dea2614d35 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 24 Sep 2024 10:39:40 +0200
Subject: [PATCH 18/92] rename the index method to embedder_index

---
 milli/src/vector/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs
index 2da8ecd57..ca607c892 100644
--- a/milli/src/vector/mod.rs
+++ b/milli/src/vector/mod.rs
@@ -45,7 +45,7 @@ impl ArroyWrapper {
         Self { database, embedder_index, quantized }
     }
 
-    pub fn index(&self) -> u8 {
+    pub fn embedder_index(&self) -> u8 {
         self.embedder_index
     }
 

From fd8447c5214b62b724f18ec5de9b92fa34537462 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 24 Sep 2024 10:52:05 +0200
Subject: [PATCH 19/92] fix the del items thing

---
 milli/src/update/index_documents/typed_chunk.rs |  2 +-
 milli/src/vector/mod.rs                         | 17 ++++++++---------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs
index e118420d8..20e70b2a6 100644
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -680,7 +680,7 @@ pub(crate) fn write_typed_chunk_into_index(
             let mut iter = merger.into_stream_merger_iter()?;
             while let Some((key, _)) = iter.next()? {
                 let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
-                writer.del_item_raw(wtxn, expected_dimension, docid)?;
+                writer.del_items(wtxn, expected_dimension, docid)?;
             }
 
             // add generated embeddings
diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs
index ca607c892..4b322ddf4 100644
--- a/milli/src/vector/mod.rs
+++ b/milli/src/vector/mod.rs
@@ -170,29 +170,28 @@ impl ArroyWrapper {
         Ok(())
     }
 
-    /// Delete an item from the index. It **does not** take care of fixing the hole
-    /// made after deleting the item.
-    pub fn del_item_raw(
+    /// Delete all embeddings from a specific `item_id`
+    pub fn del_items(
         &self,
         wtxn: &mut RwTxn,
         dimension: usize,
         item_id: arroy::ItemId,
-    ) -> Result<bool, arroy::Error> {
+    ) -> Result<(), arroy::Error> {
         for index in arroy_db_range_for_embedder(self.embedder_index) {
             if self.quantized {
                 let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
-                if writer.del_item(wtxn, item_id)? {
-                    return Ok(true);
+                if !writer.del_item(wtxn, item_id)? {
+                    break;
                 }
             } else {
                 let writer = arroy::Writer::new(self.angular_db(), index, dimension);
-                if writer.del_item(wtxn, item_id)? {
-                    return Ok(true);
+                if !writer.del_item(wtxn, item_id)? {
+                    break;
                 }
             }
         }
 
-        Ok(false)
+        Ok(())
     }
 
     /// Delete one item.

From b8a74e04647af60a396539b6ba3b47d19771cc49 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 24 Sep 2024 10:59:15 +0200
Subject: [PATCH 20/92] fix comments

---
 milli/src/vector/mod.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs
index 4b322ddf4..8341ab923 100644
--- a/milli/src/vector/mod.rs
+++ b/milli/src/vector/mod.rs
@@ -115,7 +115,10 @@ impl ArroyWrapper {
         Ok(())
     }
 
-    /// Overwrite all the embeddings associated to the index and item id.
+    /// Overwrite all the embeddings associated with the index and item ID.
+    /// /!\ It won't remove embeddings after the last passed embedding, which can leave stale embeddings.
+    ///     You should call `del_items` on the `item_id` before calling this method.
+    /// /!\ Cannot insert more than u8::MAX embeddings; after inserting u8::MAX embeddings, all the remaining ones will be silently ignored.
     pub fn add_items(
         &self,
         wtxn: &mut RwTxn,
@@ -243,7 +246,6 @@ impl ArroyWrapper {
                 last_index_with_a_vector = Some((index, candidate));
             }
             if let Some((last_index, vector)) = last_index_with_a_vector {
-                // unwrap: computed the index from the list of writers
                 let writer = arroy::Writer::new(db, last_index, dimension);
                 writer.del_item(wtxn, item_id)?;
                 let writer = arroy::Writer::new(db, deleted_index, dimension);

From 645a55317af91f37d68d26527568032016bf5393 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 24 Sep 2024 14:54:24 +0200
Subject: [PATCH 21/92] merge the build and quantize method

---
 milli/src/update/index_documents/mod.rs |  5 +--
 milli/src/vector/mod.rs                 | 43 ++++++++++++++-----------
 2 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index b03ab259a..e164a0817 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -713,10 +713,7 @@ where
 
             pool.install(|| {
                 let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
-                if is_quantizing {
-                    writer.quantize(wtxn, dimension)?;
-                }
-                writer.build(wtxn, &mut rng, dimension)?;
+                writer.build_and_quantize(wtxn, &mut rng, dimension, is_quantizing)?;
                 Result::Ok(())
             })
             .map_err(InternalError::from)??;
diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs
index 8341ab923..a33f76559 100644
--- a/milli/src/vector/mod.rs
+++ b/milli/src/vector/mod.rs
@@ -98,18 +98,37 @@ impl ArroyWrapper {
         Ok(false)
     }
 
-    /// TODO: We should early exit when it doesn't need to be built
-    pub fn build<R: rand::Rng + rand::SeedableRng>(
-        &self,
+    pub fn build_and_quantize<R: rand::Rng + rand::SeedableRng>(
+        &mut self,
         wtxn: &mut RwTxn,
         rng: &mut R,
         dimension: usize,
+        quantizing: bool,
     ) -> Result<(), arroy::Error> {
         for index in arroy_db_range_for_embedder(self.embedder_index) {
             if self.quantized {
-                arroy::Writer::new(self.quantized_db(), index, dimension).build(wtxn, rng, None)?
+                let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
+                if writer.need_build(wtxn)? {
+                    writer.build(wtxn, rng, None)?
+                } else if writer.is_empty(wtxn)? {
+                    break;
+                }
             } else {
-                arroy::Writer::new(self.angular_db(), index, dimension).build(wtxn, rng, None)?
+                let writer = arroy::Writer::new(self.angular_db(), index, dimension);
+                // If we are quantizing the databases, we can't know from meilisearch
+                // if the db was empty but still contained the wrong metadata, thus we need
+                // to quantize everything and can't stop early. Since this operation can
+                // only happens once in the life of an embedder, it's not very performances
+                // sensitive.
+                if quantizing && !self.quantized {
+                    let writer =
+                        writer.prepare_changing_distance::<BinaryQuantizedAngular>(wtxn)?;
+                    writer.build(wtxn, rng, None)?
+                } else if writer.need_build(wtxn)? {
+                    writer.build(wtxn, rng, None)?
+                } else if writer.is_empty(wtxn)? {
+                    break;
+                }
             }
         }
         Ok(())
@@ -266,20 +285,6 @@ impl ArroyWrapper {
         Ok(())
     }
 
-    pub fn is_empty(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
-        for index in arroy_db_range_for_embedder(self.embedder_index) {
-            let empty = if self.quantized {
-                arroy::Writer::new(self.quantized_db(), index, dimension).is_empty(rtxn)?
-            } else {
-                arroy::Writer::new(self.angular_db(), index, dimension).is_empty(rtxn)?
-            };
-            if !empty {
-                return Ok(false);
-            }
-        }
-        Ok(true)
-    }
-
     pub fn contains_item(
         &self,
         rtxn: &RoTxn,

From 8b4e2c7b1798e58a71dfb0538dbc980155b688cc Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 24 Sep 2024 15:00:25 +0200
Subject: [PATCH 22/92] Remove now unused method

---
 milli/src/vector/mod.rs | 26 --------------------------
 1 file changed, 26 deletions(-)

diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs
index a33f76559..39655e72a 100644
--- a/milli/src/vector/mod.rs
+++ b/milli/src/vector/mod.rs
@@ -72,32 +72,6 @@ impl ArroyWrapper {
         }
     }
 
-    pub fn quantize(&mut self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
-        if !self.quantized {
-            for index in arroy_db_range_for_embedder(self.embedder_index) {
-                let writer = arroy::Writer::new(self.angular_db(), index, dimension);
-                writer.prepare_changing_distance::<BinaryQuantizedAngular>(wtxn)?;
-            }
-            self.quantized = true;
-        }
-        Ok(())
-    }
-
-    // TODO: We can stop early when we find an empty DB
-    pub fn need_build(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
-        for index in arroy_db_range_for_embedder(self.embedder_index) {
-            let need_build = if self.quantized {
-                arroy::Writer::new(self.quantized_db(), index, dimension).need_build(rtxn)
-            } else {
-                arroy::Writer::new(self.angular_db(), index, dimension).need_build(rtxn)
-            };
-            if need_build? {
-                return Ok(true);
-            }
-        }
-        Ok(false)
-    }
-
     pub fn build_and_quantize<R: rand::Rng + rand::SeedableRng>(
         &mut self,
         wtxn: &mut RwTxn,

From 7f048b9732a048624bbe4beacb2e93f59c6d510d Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 24 Sep 2024 15:02:38 +0200
Subject: [PATCH 23/92] early exit in the clear and contains

---
 milli/src/vector/mod.rs | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs
index 39655e72a..d5b80db83 100644
--- a/milli/src/vector/mod.rs
+++ b/milli/src/vector/mod.rs
@@ -251,9 +251,17 @@ impl ArroyWrapper {
     pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
         for index in arroy_db_range_for_embedder(self.embedder_index) {
             if self.quantized {
-                arroy::Writer::new(self.quantized_db(), index, dimension).clear(wtxn)?;
+                let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
+                if writer.is_empty(wtxn)? {
+                    break;
+                }
+                writer.clear(wtxn)?;
             } else {
-                arroy::Writer::new(self.angular_db(), index, dimension).clear(wtxn)?;
+                let writer = arroy::Writer::new(self.angular_db(), index, dimension);
+                if writer.is_empty(wtxn)? {
+                    break;
+                }
+                writer.clear(wtxn)?;
             }
         }
         Ok(())
@@ -267,10 +275,17 @@ impl ArroyWrapper {
     ) -> Result<bool, arroy::Error> {
         for index in arroy_db_range_for_embedder(self.embedder_index) {
             let contains = if self.quantized {
-                arroy::Writer::new(self.quantized_db(), index, dimension)
-                    .contains_item(rtxn, item)?
+                let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
+                if writer.is_empty(rtxn)? {
+                    break;
+                }
+                writer.contains_item(rtxn, item)?
             } else {
-                arroy::Writer::new(self.angular_db(), index, dimension).contains_item(rtxn, item)?
+                let writer = arroy::Writer::new(self.angular_db(), index, dimension);
+                if writer.is_empty(rtxn)? {
+                    break;
+                }
+                writer.contains_item(rtxn, item)?
             };
             if contains {
                 return Ok(contains);

From b31e9bea26c098750dece8fb38eb2f57d6c254b5 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 24 Sep 2024 16:33:17 +0200
Subject: [PATCH 24/92] while retrieving the readers on an arroywrapper, stops
 at the first empty reader

---
 milli/src/vector/mod.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs
index d5b80db83..b6d6510af 100644
--- a/milli/src/vector/mod.rs
+++ b/milli/src/vector/mod.rs
@@ -56,7 +56,11 @@ impl ArroyWrapper {
     ) -> impl Iterator<Item = Result<arroy::Reader<D>, arroy::Error>> + 'a {
         arroy_db_range_for_embedder(self.embedder_index).map_while(move |index| {
             match arroy::Reader::open(rtxn, index, db) {
-                Ok(reader) => Some(Ok(reader)),
+                Ok(reader) => match reader.is_empty(rtxn) {
+                    Ok(false) => Some(Ok(reader)),
+                    Ok(true) => None,
+                    Err(e) => Some(Err(e)),
+                },
                 Err(arroy::Error::MissingMetadata(_)) => None,
                 Err(e) => Some(Err(e)),
             }

From e9580fe61946477d83b9222ad4c00058a9868824 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Wed, 25 Sep 2024 11:03:17 +0200
Subject: [PATCH 25/92] Add turkish normalization

---
 meilisearch-types/Cargo.toml | 5 ++++-
 meilisearch/Cargo.toml       | 1 +
 milli/Cargo.toml             | 4 ++++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml
index cb4937e57..0dae024f2 100644
--- a/meilisearch-types/Cargo.toml
+++ b/meilisearch-types/Cargo.toml
@@ -66,5 +66,8 @@ khmer = ["milli/khmer"]
 vietnamese = ["milli/vietnamese"]
 # force swedish character recomposition
 swedish-recomposition = ["milli/swedish-recomposition"]
-# force german character recomposition
+# allow german tokenization
 german = ["milli/german"]
+# allow turkish normalization
+turkish = ["milli/turkish"]
+
diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml
index 2a16e1017..c193c89d4 100644
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -154,6 +154,7 @@ khmer = ["meilisearch-types/khmer"]
 vietnamese = ["meilisearch-types/vietnamese"]
 swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
 german = ["meilisearch-types/german"]
+turkish = ["meilisearch-types/turkish"]
 
 [package.metadata.mini-dashboard]
 assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip"
diff --git a/milli/Cargo.toml b/milli/Cargo.toml
index 5fc2d65c8..70d09ce4e 100644
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -108,6 +108,7 @@ all-tokenizations = [
     "charabia/vietnamese",
     "charabia/swedish-recomposition",
     "charabia/german-segmentation",
+    "charabia/turkish",
 ]
 
 # Use POSIX semaphores instead of SysV semaphores in LMDB
@@ -146,5 +147,8 @@ german = ["charabia/german-segmentation"]
 # force swedish character recomposition
 swedish-recomposition = ["charabia/swedish-recomposition"]
 
+# allow turkish specialized tokenization
+turkish = ["charabia/turkish"]
+
 # allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
 cuda = ["candle-core/cuda"]

From dc2cb58cf1ce3fa33f791d095f095c429a6ad9c0 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Wed, 25 Sep 2024 11:12:30 +0200
Subject: [PATCH 26/92] use charabia default for all-tokenization

---
 milli/Cargo.toml | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/milli/Cargo.toml b/milli/Cargo.toml
index 70d09ce4e..3c4a44639 100644
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -98,17 +98,7 @@ rand = { version = "0.8.5", features = ["small_rng"] }
 
 [features]
 all-tokenizations = [
-    "charabia/chinese",
-    "charabia/hebrew",
-    "charabia/japanese",
-    "charabia/thai",
-    "charabia/korean",
-    "charabia/greek",
-    "charabia/khmer",
-    "charabia/vietnamese",
-    "charabia/swedish-recomposition",
-    "charabia/german-segmentation",
-    "charabia/turkish",
+    "charabia/default",
 ]
 
 # Use POSIX semaphores instead of SysV semaphores in LMDB

From 78a4b7949df6c1f5ee6e95c80b8966ddf5aca957 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 26 Sep 2024 15:04:03 +0200
Subject: [PATCH 27/92] =?UTF-8?q?update=20rhai=20to=20a=20version=20that?=
 =?UTF-8?q?=20shouldn=E2=80=99t=20panic?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Cargo.lock       | 8 +++-----
 milli/Cargo.toml | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index bcca35173..3237d4e16 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4581,9 +4581,8 @@ dependencies = [
 
 [[package]]
 name = "rhai"
-version = "1.19.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61797318be89b1a268a018a92a7657096d83f3ecb31418b9e9c16dcbb043b702"
+version = "1.20.0"
+source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b83fd65f20a1e4#ef3df63121d27aacd838f366f2b83fd65f20a1e4"
 dependencies = [
  "ahash 0.8.11",
  "bitflags 2.6.0",
@@ -4600,8 +4599,7 @@ dependencies = [
 [[package]]
 name = "rhai_codegen"
 version = "2.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5a11a05ee1ce44058fa3d5961d05194fdbe3ad6b40f904af764d81b86450e6b"
+source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b83fd65f20a1e4#ef3df63121d27aacd838f366f2b83fd65f20a1e4"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/milli/Cargo.toml b/milli/Cargo.toml
index 5fc2d65c8..b22d2164f 100644
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -79,7 +79,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
 ] }
 tiktoken-rs = "0.5.9"
 liquid = "0.26.6"
-rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
+rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838f366f2b83fd65f20a1e4", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
 arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" }
 rand = "0.8.5"
 tracing = "0.1.40"

From d20a39b9599f7962b2a316e45cc126f90a3d8eed Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Fri, 27 Sep 2024 15:44:30 +0300
Subject: [PATCH 28/92] Refactor find_best_match_interval

---
 milli/src/search/new/matches/mod.rs | 154 +++++++++++++++++++---------
 1 file changed, 106 insertions(+), 48 deletions(-)

diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index 26115c39b..bbd39e682 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -442,36 +442,48 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
 
     /// Returns the matches interval where the score computed by match_interval_score is the best.
     fn find_best_match_interval<'a>(&self, matches: &'a [Match], crop_size: usize) -> &'a [Match] {
+        let matches_len = matches.len();
+
         // we compute the matches interval if we have at least 2 matches.
-        if matches.len() > 1 {
+        if matches_len > 1 {
+            // current interval positions.
+            let mut interval_first = 0;
             // positions of the first and the last match of the best matches interval in `matches`.
             let mut best_interval = (0, 0);
             let mut best_interval_score = self.match_interval_score(&matches[0..=0]);
-            // current interval positions.
-            let mut interval_first = 0;
-            let mut interval_last = 0;
-            for (index, next_match) in matches.iter().enumerate().skip(1) {
+
+            let mut index = 1;
+            while index < matches_len - 1 {
+                let next_match = &matches[index];
+
                 // if next match would make interval gross more than crop_size,
                 // we compare the current interval with the best one,
                 // then we increase `interval_first` until next match can be added.
                 let next_match_last_word_pos = next_match.get_last_word_pos();
-                let mut interval_first_match_first_word_pos =
+                let interval_first_match_first_word_pos =
                     matches[interval_first].get_first_word_pos();
 
+                // if the next match would mean that we pass the crop size window,
+                // we take the last valid match, that didn't pass this boundry, which is `index` - 1,
+                // and calculate a score for it, and check if it's better than our best so far
                 if next_match_last_word_pos - interval_first_match_first_word_pos >= crop_size {
-                    let interval_score =
-                        self.match_interval_score(&matches[interval_first..=interval_last]);
+                    // skip for 1, because it would result in the same as our very first interval score
+                    if index != 1 {
+                        let interval_last = index - 1;
+                        let interval_score =
+                            self.match_interval_score(&matches[interval_first..=interval_last]);
 
-                    // keep interval if it's the best
-                    if interval_score > best_interval_score {
-                        best_interval = (interval_first, interval_last);
-                        best_interval_score = interval_score;
+                        // keep interval if it's the best
+                        if interval_score > best_interval_score {
+                            best_interval = (interval_first, interval_last);
+                            best_interval_score = interval_score;
+                        }
                     }
 
                     // advance start of the interval while interval is longer than crop_size.
                     loop {
                         interval_first += 1;
-                        interval_first_match_first_word_pos =
+                        let interval_first_match_first_word_pos =
                             matches[interval_first].get_first_word_pos();
 
                         if next_match_last_word_pos - interval_first_match_first_word_pos
@@ -481,10 +493,12 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                         }
                     }
                 }
-                interval_last = index;
+
+                index += 1;
             }
 
             // compute the last interval score and compare it to the best one.
+            let interval_last = matches_len - 1;
             let interval_score =
                 self.match_interval_score(&matches[interval_first..=interval_last]);
             if interval_score > best_interval_score {
@@ -914,32 +928,32 @@ mod tests {
 
         let format_options = FormatOptions { highlight: true, crop: Some(10) };
 
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
-        let mut matcher = builder.build(text, None);
-        // should return 10 words with a marker at the start as well the end, and the highlighted matches.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…the power to split <em>the world</em> between those who embraced…"
-        );
+        // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
+        // let mut matcher = builder.build(text, None);
+        // // should return 10 words with a marker at the start as well the end, and the highlighted matches.
+        // insta::assert_snapshot!(
+        //     matcher.format(format_options),
+        //     @"…the power to split <em>the world</em> between those who embraced…"
+        // );
 
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\"");
-        let mut matcher = builder.build(text, None);
-        // should highlight "those" and the phrase "and those".
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…world between <em>those</em> who embraced progress <em>and those</em> who resisted…"
-        );
+        // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"power to\" \"and those\"");
+        // let mut matcher = builder.build(text, None);
+        // // should highlight "those" and the phrase "and those".
+        // insta::assert_snapshot!(
+        //     matcher.format(format_options),
+        //     @"…groundbreaking invention had the <em>power to</em> split the world between…"
+        // );
 
-        let builder = MatcherBuilder::new_test(
-            &rtxn,
-            &temp_index,
-            "\"The groundbreaking invention had the power to split the world\"",
-        );
-        let mut matcher = builder.build(text, None);
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"<em>The groundbreaking invention had the power to split the world</em>…"
-        );
+        // let builder = MatcherBuilder::new_test(
+        //     &rtxn,
+        //     &temp_index,
+        //     "\"The groundbreaking invention had the power to split the world\"",
+        // );
+        // let mut matcher = builder.build(text, None);
+        // insta::assert_snapshot!(
+        //     matcher.format(format_options),
+        //     @"<em>The groundbreaking invention had the power to split the world</em>…"
+        // );
 
         let builder = MatcherBuilder::new_test(
             &rtxn,
@@ -952,16 +966,60 @@ mod tests {
             @"The groundbreaking invention had the power to split the world …"
         );
 
-        let builder = MatcherBuilder::new_test(
-            &rtxn,
-            &temp_index,
-            "\"The groundbreaking invention\" \"embraced progress and those who resisted change\"",
-        );
-        let mut matcher = builder.build(text, None);
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…between those who <em>embraced progress and those who resisted change</em>…"
-        );
+        // let builder = MatcherBuilder::new_test(
+        //     &rtxn,
+        //     &temp_index,
+        //     "\"The groundbreaking invention\" \"embraced progress and those who resisted change!\"",
+        // );
+        // let mut matcher = builder.build(text, None);
+        // insta::assert_snapshot!(
+        //     matcher.format(format_options),
+        //     @"…between those who <em>embraced progress and those who resisted change</em>…"
+        // );
+
+        // let builder = MatcherBuilder::new_test(
+        //     &rtxn,
+        //     &temp_index,
+        //     "\"The groundbreaking invention\" \"split the world between those\"",
+        // );
+        // let mut matcher = builder.build(text, None);
+        // insta::assert_snapshot!(
+        //     matcher.format(format_options),
+        //     @"…the power to <em>split the world between those</em> who embraced…"
+        // );
+
+        // let builder = MatcherBuilder::new_test(
+        //     &rtxn,
+        //     &temp_index,
+        //     "\"groundbreaking invention\" \"split the world between\"",
+        // );
+        // let mut matcher = builder.build(text, None);
+        // insta::assert_snapshot!(
+        //     matcher.format(format_options),
+        //     @"…<em>groundbreaking invention</em> had the power to <em>split the world between</em>…"
+        // );
+
+        // let builder = MatcherBuilder::new_test(
+        //     &rtxn,
+        //     &temp_index,
+        //     "\"groundbreaking invention\" \"had the power to split the world between those\"",
+        // );
+        // let mut matcher = builder.build(text, None);
+        // insta::assert_snapshot!(
+        //     matcher.format(format_options),
+        //     @"…invention <em>had the power to split the world between those</em>…"
+        // );
+
+        // let builder = MatcherBuilder::new_test(
+        //     &rtxn,
+        //     &temp_index,
+        //     "\"The groundbreaking invention\" \"had the power to split the world between those\"",
+        // );
+        // let mut matcher = builder.build(text, None);
+        // insta::assert_snapshot!(
+        //     matcher.format(format_options),
+        //     @"…invention <em>had the power to split the world between those</em>…"
+        // );
     }
 
     #[test]

From eabc14c26858d9f0bda89e6fa38f0aa4b0244be8 Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Mon, 30 Sep 2024 21:24:41 +0300
Subject: [PATCH 29/92] Refactor, handle more cases for phrases

---
 .../src/search/new/matches/matching_words.rs  |   2 +-
 milli/src/search/new/matches/mod.rs           | 497 ++++++++++--------
 2 files changed, 291 insertions(+), 208 deletions(-)

diff --git a/milli/src/search/new/matches/matching_words.rs b/milli/src/search/new/matches/matching_words.rs
index 4ad5c37ec..4deaff6a0 100644
--- a/milli/src/search/new/matches/matching_words.rs
+++ b/milli/src/search/new/matches/matching_words.rs
@@ -181,7 +181,7 @@ impl<'a> PartialMatch<'a> {
         // return a new Partial match allowing the highlighter to continue.
         if is_matching && matching_words.len() > 1 {
             matching_words.remove(0);
-            Some(MatchType::Partial(PartialMatch { matching_words, ids, char_len }))
+            Some(MatchType::Partial(Self { matching_words, ids, char_len }))
         // if there is no remaining word to match in the phrase and the current token is matching,
         // return a Full match.
         } else if is_matching {
diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index bbd39e682..624287f5f 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -1,6 +1,6 @@
 use std::borrow::Cow;
 
-use charabia::{Language, SeparatorKind, Token, Tokenizer};
+use charabia::{Language, SeparatorKind, Token, TokenKind, Tokenizer};
 pub use matching_words::MatchingWords;
 use matching_words::{MatchType, PartialMatch, WordId};
 use serde::Serialize;
@@ -145,6 +145,13 @@ impl Match {
             MatchPosition::Phrase { token_positions: (_, ltp), .. } => ltp,
         }
     }
+
+    fn get_word_count(&self) -> usize {
+        match self.position {
+            MatchPosition::Word { .. } => 1,
+            MatchPosition::Phrase { word_positions: (fwp, lwp), .. } => lwp - fwp + 1,
+        }
+    }
 }
 
 #[derive(Serialize, Debug, Clone, PartialEq, Eq)]
@@ -153,6 +160,27 @@ pub struct MatchBounds {
     pub length: usize,
 }
 
+enum SimpleTokenKind {
+    Separator(SeparatorKind),
+    NotSeparator,
+}
+
+impl SimpleTokenKind {
+    fn get(token: &&Token<'_>) -> Self {
+        match token.kind {
+            TokenKind::Separator(separaor_kind) => Self::Separator(separaor_kind),
+            _ => Self::NotSeparator,
+        }
+    }
+
+    fn is_not_separator(&self) -> bool {
+        match self {
+            SimpleTokenKind::NotSeparator => true,
+            SimpleTokenKind::Separator(_) => false,
+        }
+    }
+}
+
 /// Structure used to analyze a string, compute words that match,
 /// and format the source string, returning a highlighted and cropped sub-string.
 pub struct Matcher<'t, 'tokenizer, 'b, 'lang> {
@@ -287,95 +315,130 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
         crop_size: usize,
     ) -> (usize, usize) {
         // if there is no match, we start from the beginning of the string by default.
-        let first_match_word_position =
+        let first_match_first_word_position =
             matches.first().map(|m| m.get_first_word_pos()).unwrap_or(0);
-        let first_match_token_position =
+        let first_match_first_token_position =
             matches.first().map(|m| m.get_first_token_pos()).unwrap_or(0);
-        let last_match_word_position = matches.last().map(|m| m.get_last_word_pos()).unwrap_or(0);
-        let last_match_token_position = matches.last().map(|m| m.get_last_token_pos()).unwrap_or(0);
+        let last_match_last_word_position =
+            matches.last().map(|m| m.get_last_word_pos()).unwrap_or(0);
+        let last_match_last_token_position =
+            matches.last().map(|m| m.get_last_token_pos()).unwrap_or(0);
 
-        // matches needs to be counted in the crop len.
-        let mut remaining_words = crop_size + first_match_word_position - last_match_word_position;
+        let matches_window_len =
+            last_match_last_word_position - first_match_first_word_position + 1;
 
-        // create the initial state of the crop window: 2 iterators starting from the matches positions,
-        // a reverse iterator starting from the first match token position and going towards the beginning of the text,
-        let mut before_tokens = tokens[..first_match_token_position].iter().rev().peekable();
-        // an iterator starting from the last match token position and going towards the end of the text.
-        let mut after_tokens = tokens[last_match_token_position..].iter().peekable();
+        if crop_size >= matches_window_len {
+            // matches needs to be counted in the crop len.
+            let mut remaining_words = crop_size - matches_window_len;
 
-        // grows the crop window peeking in both directions
-        // until the window contains the good number of words:
-        while remaining_words > 0 {
-            let before_token = before_tokens.peek().map(|t| t.separator_kind());
-            let after_token = after_tokens.peek().map(|t| t.separator_kind());
+            // create the initial state of the crop window: 2 iterators starting from the matches positions,
+            // a reverse iterator starting from the first match token position and going towards the beginning of the text,
+            let mut before_tokens =
+                tokens[..first_match_first_token_position].iter().rev().peekable();
+            // an iterator starting from the last match token position and going towards the end of the text.
+            let mut after_tokens = tokens[last_match_last_token_position + 1..].iter().peekable();
 
-            match (before_token, after_token) {
-                // we can expand both sides.
-                (Some(before_token), Some(after_token)) => {
-                    match (before_token, after_token) {
-                        // if they are both separators and are the same kind then advance both,
-                        // or expand in the soft separator separator side.
-                        (Some(before_token_kind), Some(after_token_kind)) => {
-                            if before_token_kind == after_token_kind {
-                                before_tokens.next();
+            // grows the crop window peeking in both directions
+            // until the window contains the good number of words:
+            while remaining_words > 0 {
+                let before_token_kind = before_tokens.peek().map(SimpleTokenKind::get);
+                let after_token_kind = after_tokens.peek().map(SimpleTokenKind::get);
 
-                                // this avoid having an ending separator before crop marker.
-                                if remaining_words > 1 {
+                match (before_token_kind, after_token_kind) {
+                    // we can expand both sides.
+                    (Some(before_token_kind), Some(after_token_kind)) => {
+                        match (before_token_kind, after_token_kind) {
+                            // if they are both separators and are the same kind then advance both,
+                            // or expand in the soft separator separator side.
+                            (
+                                SimpleTokenKind::Separator(before_token_separator_kind),
+                                SimpleTokenKind::Separator(after_token_separator_kind),
+                            ) => {
+                                if before_token_separator_kind == after_token_separator_kind {
+                                    before_tokens.next();
+
+                                    // this avoid having an ending separator before crop marker.
+                                    if remaining_words > 1 {
+                                        after_tokens.next();
+                                    }
+                                } else if let SeparatorKind::Hard = before_token_separator_kind {
                                     after_tokens.next();
+                                } else {
+                                    before_tokens.next();
                                 }
-                            } else if before_token_kind == SeparatorKind::Hard {
-                                after_tokens.next();
-                            } else {
-                                before_tokens.next();
                             }
-                        }
-                        // if one of the tokens is a word, we expend in the side of the word.
-                        // left is a word, advance left.
-                        (None, Some(_)) => {
-                            before_tokens.next();
-                            remaining_words -= 1;
-                        }
-                        // right is a word, advance right.
-                        (Some(_), None) => {
-                            after_tokens.next();
-                            remaining_words -= 1;
-                        }
-                        // both are words, advance left then right if remaining_word > 0.
-                        (None, None) => {
-                            before_tokens.next();
-                            remaining_words -= 1;
-
-                            if remaining_words > 0 {
+                            // if one of the tokens is a word, we expend in the side of the word.
+                            // left is a word, advance left.
+                            (SimpleTokenKind::NotSeparator, SimpleTokenKind::Separator(_)) => {
+                                before_tokens.next();
+                                remaining_words -= 1;
+                            }
+                            // right is a word, advance right.
+                            (SimpleTokenKind::Separator(_), SimpleTokenKind::NotSeparator) => {
                                 after_tokens.next();
                                 remaining_words -= 1;
                             }
+                            // both are words, advance left then right if remaining_word > 0.
+                            (SimpleTokenKind::NotSeparator, SimpleTokenKind::NotSeparator) => {
+                                before_tokens.next();
+                                remaining_words -= 1;
+
+                                if remaining_words > 0 {
+                                    after_tokens.next();
+                                    remaining_words -= 1;
+                                }
+                            }
                         }
                     }
-                }
-                // the end of the text is reached, advance left.
-                (Some(before_token), None) => {
-                    before_tokens.next();
-                    if before_token.is_none() {
-                        remaining_words -= 1;
+                    // the end of the text is reached, advance left.
+                    (Some(before_token_kind), None) => {
+                        before_tokens.next();
+                        if let SimpleTokenKind::NotSeparator = before_token_kind {
+                            remaining_words -= 1;
+                        }
                     }
-                }
-                // the start of the text is reached, advance right.
-                (None, Some(after_token)) => {
-                    after_tokens.next();
-                    if after_token.is_none() {
-                        remaining_words -= 1;
+                    // the start of the text is reached, advance right.
+                    (None, Some(after_token_kind)) => {
+                        after_tokens.next();
+                        if let SimpleTokenKind::NotSeparator = after_token_kind {
+                            remaining_words -= 1;
+                        }
                     }
+                    // no more token to add.
+                    (None, None) => break,
                 }
-                // no more token to add.
-                (None, None) => break,
             }
+
+            // finally, keep the byte index of each bound of the crop window.
+            let crop_byte_start = before_tokens.next().map_or(0, |t| t.byte_end);
+            let crop_byte_end = after_tokens.next().map_or(self.text.len(), |t| t.byte_start);
+
+            (crop_byte_start, crop_byte_end)
+        } else {
+            // there's one match? and it's longer than the crop window, so we have to advance inward
+            let mut remaining_extra_words = matches_window_len - crop_size;
+            let mut tokens_from_end =
+                tokens[..=last_match_last_token_position].iter().rev().peekable();
+
+            while remaining_extra_words > 0 {
+                let token_from_end_kind =
+                    tokens_from_end.peek().map(SimpleTokenKind::get).expect("TODO");
+                if token_from_end_kind.is_not_separator() {
+                    remaining_extra_words -= 1;
+                }
+
+                tokens_from_end.next();
+            }
+
+            let crop_byte_start = if first_match_first_token_position > 0 {
+                &tokens[first_match_first_token_position - 1].byte_end
+            } else {
+                &0
+            };
+            let crop_byte_end = tokens_from_end.next().map(|t| t.byte_start).expect("TODO");
+
+            (*crop_byte_start, crop_byte_end)
         }
-
-        // finally, keep the byte index of each bound of the crop window.
-        let crop_byte_start = before_tokens.next().map_or(0, |t| t.byte_end);
-        let crop_byte_end = after_tokens.next().map_or(self.text.len(), |t| t.byte_start);
-
-        (crop_byte_start, crop_byte_end)
     }
 
     /// Compute the score of a match interval:
@@ -416,11 +479,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                         lwp
                     }
                 };
-
-                let next_match_first_word_pos = match next_match.position {
-                    MatchPosition::Word { word_position, .. } => word_position,
-                    MatchPosition::Phrase { word_positions: (fwp, _), .. } => fwp,
-                };
+                let next_match_first_word_pos = next_match.get_first_word_pos();
 
                 // compute distance between matches
                 distance_score -= (next_match_first_word_pos - m_last_word_pos).min(7) as i16;
@@ -443,72 +502,96 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
     /// Returns the matches interval where the score computed by match_interval_score is the best.
     fn find_best_match_interval<'a>(&self, matches: &'a [Match], crop_size: usize) -> &'a [Match] {
         let matches_len = matches.len();
+        if matches_len <= 1 {
+            return matches;
+        }
+
+        // positions of the first and the last match of the best matches interval in `matches`.
+        struct BestInterval {
+            interval: (usize, usize),
+            score: (i16, i16, i16),
+        }
+
+        fn save_best_interval(
+            best_interval: &mut Option<BestInterval>,
+            interval_first: usize,
+            interval_last: usize,
+            interval_score: (i16, i16, i16),
+        ) {
+            if let Some(best_interval) = best_interval {
+                if interval_score > best_interval.score {
+                    best_interval.interval = (interval_first, interval_last);
+                    best_interval.score = interval_score;
+                }
+            } else {
+                *best_interval = Some(BestInterval {
+                    interval: (interval_first, interval_last),
+                    score: interval_score,
+                });
+            }
+        }
+
+        let mut best_interval: Option<BestInterval> = None;
 
         // we compute the matches interval if we have at least 2 matches.
-        if matches_len > 1 {
-            // current interval positions.
-            let mut interval_first = 0;
-            // positions of the first and the last match of the best matches interval in `matches`.
-            let mut best_interval = (0, 0);
-            let mut best_interval_score = self.match_interval_score(&matches[0..=0]);
+        // current interval positions.
+        let mut interval_first = 0;
+        let mut interval_first_match_first_word_pos = matches[interval_first].get_first_word_pos();
 
-            let mut index = 1;
-            while index < matches_len - 1 {
-                let next_match = &matches[index];
+        for (index, next_match) in matches.iter().enumerate() {
+            // if next match would make interval gross more than crop_size,
+            // we compare the current interval with the best one,
+            // then we increase `interval_first` until next match can be added.
+            let next_match_last_word_pos = next_match.get_last_word_pos();
 
-                // if next match would make interval gross more than crop_size,
-                // we compare the current interval with the best one,
-                // then we increase `interval_first` until next match can be added.
-                let next_match_last_word_pos = next_match.get_last_word_pos();
-                let interval_first_match_first_word_pos =
-                    matches[interval_first].get_first_word_pos();
+            // if the next match would mean that we pass the crop size window,
+            // we take the last valid match, that didn't pass this boundry, which is `index` - 1,
+            // and calculate a score for it, and check if it's better than our best so far
+            if next_match_last_word_pos - interval_first_match_first_word_pos >= crop_size {
+                // if index is 0 there is no last viable match
+                if index != 0 {
+                    let interval_last = index - 1;
+                    let interval_score =
+                        self.match_interval_score(&matches[interval_first..=interval_last]);
 
-                // if the next match would mean that we pass the crop size window,
-                // we take the last valid match, that didn't pass this boundry, which is `index` - 1,
-                // and calculate a score for it, and check if it's better than our best so far
-                if next_match_last_word_pos - interval_first_match_first_word_pos >= crop_size {
-                    // skip for 1, because it would result in the same as our very first interval score
-                    if index != 1 {
-                        let interval_last = index - 1;
-                        let interval_score =
-                            self.match_interval_score(&matches[interval_first..=interval_last]);
-
-                        // keep interval if it's the best
-                        if interval_score > best_interval_score {
-                            best_interval = (interval_first, interval_last);
-                            best_interval_score = interval_score;
-                        }
-                    }
-
-                    // advance start of the interval while interval is longer than crop_size.
-                    loop {
-                        interval_first += 1;
-                        let interval_first_match_first_word_pos =
-                            matches[interval_first].get_first_word_pos();
-
-                        if next_match_last_word_pos - interval_first_match_first_word_pos
-                            < crop_size
-                        {
-                            break;
-                        }
-                    }
+                    // keep interval if it's the best
+                    save_best_interval(
+                        &mut best_interval,
+                        interval_first,
+                        interval_last,
+                        interval_score,
+                    );
                 }
 
-                index += 1;
-            }
+                // advance start of the interval while interval is longer than crop_size.
+                loop {
+                    interval_first += 1;
+                    interval_first_match_first_word_pos =
+                        matches[interval_first].get_first_word_pos();
 
-            // compute the last interval score and compare it to the best one.
-            let interval_last = matches_len - 1;
+                    if interval_first_match_first_word_pos > next_match_last_word_pos
+                        || next_match_last_word_pos - interval_first_match_first_word_pos
+                            < crop_size
+                    {
+                        break;
+                    }
+                }
+            }
+        }
+
+        // compute the last interval score and compare it to the best one.
+        let interval_last = matches_len - 1;
+        // if it's the last match with itself, we need to make sure it's
+        // not a phrase longer than the crop window
+        if interval_first != interval_last || matches[interval_first].get_word_count() < crop_size {
             let interval_score =
                 self.match_interval_score(&matches[interval_first..=interval_last]);
-            if interval_score > best_interval_score {
-                best_interval = (interval_first, interval_last);
-            }
-
-            &matches[best_interval.0..=best_interval.1]
-        } else {
-            matches
+            save_best_interval(&mut best_interval, interval_first, interval_last, interval_score);
         }
+
+        // if none of the matches fit the criteria above, default to the first one
+        let best_interval = best_interval.map_or((0, 0), |v| v.interval);
+        &matches[best_interval.0..=best_interval.1]
     }
 
     // Returns the formatted version of the original text.
@@ -928,98 +1011,98 @@ mod tests {
 
         let format_options = FormatOptions { highlight: true, crop: Some(10) };
 
-        // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
-        // let mut matcher = builder.build(text, None);
-        // // should return 10 words with a marker at the start as well the end, and the highlighted matches.
-        // insta::assert_snapshot!(
-        //     matcher.format(format_options),
-        //     @"…the power to split <em>the world</em> between those who embraced…"
-        // );
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
+        let mut matcher = builder.build(text, None);
+        // should return 10 words with a marker at the start as well the end, and the highlighted matches.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…the power to split <em>the world</em> between those who embraced…"
+        );
 
-        // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"power to\" \"and those\"");
-        // let mut matcher = builder.build(text, None);
-        // // should highlight "those" and the phrase "and those".
-        // insta::assert_snapshot!(
-        //     matcher.format(format_options),
-        //     @"…groundbreaking invention had the <em>power to</em> split the world between…"
-        // );
-
-        // let builder = MatcherBuilder::new_test(
-        //     &rtxn,
-        //     &temp_index,
-        //     "\"The groundbreaking invention had the power to split the world\"",
-        // );
-        // let mut matcher = builder.build(text, None);
-        // insta::assert_snapshot!(
-        //     matcher.format(format_options),
-        //     @"<em>The groundbreaking invention had the power to split the world</em>…"
-        // );
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"power to\" \"and those\"");
+        let mut matcher = builder.build(text, None);
+        // should highlight "those" and the phrase "and those".
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…groundbreaking invention had the <em>power to</em> split the world between…"
+        );
 
         let builder = MatcherBuilder::new_test(
             &rtxn,
             &temp_index,
-            "\"The groundbreaking invention had the power to split the world between\"",
+            "\"The groundbreaking invention had the power to split the world\"",
         );
         let mut matcher = builder.build(text, None);
         insta::assert_snapshot!(
             matcher.format(format_options),
-            @"The groundbreaking invention had the power to split the world …"
+            @"<em>The groundbreaking invention had the power to split the world</em>…"
         );
 
-        // let builder = MatcherBuilder::new_test(
-        //     &rtxn,
-        //     &temp_index,
-        //     "\"The groundbreaking invention\" \"embraced progress and those who resisted change!\"",
-        // );
-        // let mut matcher = builder.build(text, None);
-        // insta::assert_snapshot!(
-        //     matcher.format(format_options),
-        //     @"…between those who <em>embraced progress and those who resisted change</em>…"
-        // );
+        let builder = MatcherBuilder::new_test(
+            &rtxn,
+            &temp_index,
+            "\"The groundbreaking invention had the power to split the world between those\"",
+        );
+        let mut matcher = builder.build(text, None);
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"The groundbreaking invention had the power to split the world…"
+        );
 
-        // let builder = MatcherBuilder::new_test(
-        //     &rtxn,
-        //     &temp_index,
-        //     "\"The groundbreaking invention\" \"split the world between those\"",
-        // );
-        // let mut matcher = builder.build(text, None);
-        // insta::assert_snapshot!(
-        //     matcher.format(format_options),
-        //     @"…the power to <em>split the world between those</em> who embraced…"
-        // );
+        let builder = MatcherBuilder::new_test(
+            &rtxn,
+            &temp_index,
+            "\"The groundbreaking invention\" \"embraced progress and those who resisted change!\"",
+        );
+        let mut matcher = builder.build(text, None);
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…between those who <em>embraced progress and those who resisted change</em>…"
+        );
 
-        // let builder = MatcherBuilder::new_test(
-        //     &rtxn,
-        //     &temp_index,
-        //     "\"groundbreaking invention\" \"split the world between\"",
-        // );
-        // let mut matcher = builder.build(text, None);
-        // insta::assert_snapshot!(
-        //     matcher.format(format_options),
-        //     @"…<em>groundbreaking invention</em> had the power to <em>split the world between</em>…"
-        // );
+        let builder = MatcherBuilder::new_test(
+            &rtxn,
+            &temp_index,
+            "\"The groundbreaking invention\" \"split the world between those\"",
+        );
+        let mut matcher = builder.build(text, None);
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…the power to <em>split the world between those</em> who embraced…"
+        );
 
-        // let builder = MatcherBuilder::new_test(
-        //     &rtxn,
-        //     &temp_index,
-        //     "\"groundbreaking invention\" \"had the power to split the world between those\"",
-        // );
-        // let mut matcher = builder.build(text, None);
-        // insta::assert_snapshot!(
-        //     matcher.format(format_options),
-        //     @"…invention <em>had the power to split the world between those</em>…"
-        // );
+        let builder = MatcherBuilder::new_test(
+            &rtxn,
+            &temp_index,
+            "\"groundbreaking invention\" \"split the world between\"",
+        );
+        let mut matcher = builder.build(text, None);
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…<em>groundbreaking invention</em> had the power to <em>split the world between</em>…"
+        );
 
-        // let builder = MatcherBuilder::new_test(
-        //     &rtxn,
-        //     &temp_index,
-        //     "\"The groundbreaking invention\" \"had the power to split the world between those\"",
-        // );
-        // let mut matcher = builder.build(text, None);
-        // insta::assert_snapshot!(
-        //     matcher.format(format_options),
-        //     @"…invention <em>had the power to split the world between those</em>…"
-        // );
+        let builder = MatcherBuilder::new_test(
+            &rtxn,
+            &temp_index,
+            "\"groundbreaking invention\" \"had the power to split the world between those\"",
+        );
+        let mut matcher = builder.build(text, None);
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…invention <em>had the power to split the world between those</em>…"
+        );
+
+        let builder = MatcherBuilder::new_test(
+            &rtxn,
+            &temp_index,
+            "\"The groundbreaking invention\" \"had the power to split the world between those\"",
+        );
+        let mut matcher = builder.build(text, None);
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…invention <em>had the power to split the world between those</em>…"
+        );
     }
 
     #[test]

From 6d16230f17eb000407adb21dc2f3e9fa49767cc8 Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Tue, 1 Oct 2024 17:19:15 +0300
Subject: [PATCH 30/92] Refactor

---
 milli/src/search/new/matches/mod.rs | 327 ++++++++++++++--------------
 1 file changed, 158 insertions(+), 169 deletions(-)

diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index 624287f5f..804b59553 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -181,6 +181,149 @@ impl SimpleTokenKind {
     }
 }
 
+#[derive(PartialEq, PartialOrd)]
+struct MatchIntervalScore(i16, i16, i16);
+
+impl MatchIntervalScore {
+    /// Compute the score of a match interval:
+    /// 1) count unique matches
+    /// 2) calculate distance between matches
+    /// 3) count ordered matches
+    fn new(matches: &[Match]) -> Self {
+        let mut ids: Vec<WordId> = Vec::with_capacity(matches.len());
+        let mut order_score = 0;
+        let mut distance_score = 0;
+
+        // count score for phrases
+        fn tally_phrase_scores(
+            fwp: &usize,
+            lwp: &usize,
+            order_score: &mut i16,
+            distance_score: &mut i16,
+        ) {
+            let words_in_phrase_minus_one = (lwp - fwp) as i16;
+            // will always be ordered, so +1 for each space between words
+            *order_score += words_in_phrase_minus_one;
+            // distance will always be 1, so -1 for each space between words
+            *distance_score -= words_in_phrase_minus_one;
+        }
+
+        let mut iter = matches.iter().peekable();
+        while let Some(m) = iter.next() {
+            if let Some(next_match) = iter.peek() {
+                // if matches are ordered
+                if next_match.ids.iter().min() > m.ids.iter().min() {
+                    order_score += 1;
+                }
+
+                let m_last_word_pos = match m.position {
+                    MatchPosition::Word { word_position, .. } => word_position,
+                    MatchPosition::Phrase { word_positions: (fwp, lwp), .. } => {
+                        tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score);
+                        lwp
+                    }
+                };
+                let next_match_first_word_pos = next_match.get_first_word_pos();
+
+                // compute distance between matches
+                distance_score -= (next_match_first_word_pos - m_last_word_pos).min(7) as i16;
+            } else if let MatchPosition::Phrase { word_positions: (fwp, lwp), .. } = m.position {
+                // in case last match is a phrase, count score for its words
+                tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score);
+            }
+
+            ids.extend(m.ids.iter());
+        }
+
+        ids.sort_unstable();
+        ids.dedup();
+        let uniq_score = ids.len() as i16;
+
+        // rank by unique match count, then by distance between matches, then by ordered match count.
+        Self(uniq_score, distance_score, order_score)
+    }
+}
+
+struct MatchIntervalWithScore {
+    interval: (usize, usize),
+    score: MatchIntervalScore,
+}
+
+impl MatchIntervalWithScore {
+    /// Returns the matches interval where the score computed by match_interval_score is the best.
+    fn find_best_match_interval(matches: &[Match], crop_size: usize) -> &[Match] {
+        let matches_len = matches.len();
+        if matches_len <= 1 {
+            return matches;
+        }
+
+        // positions of the first and the last match of the best matches interval in `matches`.
+        let mut best_interval: Option<Self> = None;
+        let mut save_best_interval = |interval_first, interval_last, interval_score| {
+            let is_interval_score_better =
+                &best_interval.as_ref().map_or(true, |Self { score, .. }| interval_score > *score);
+            if *is_interval_score_better {
+                best_interval =
+                    Some(Self { interval: (interval_first, interval_last), score: interval_score });
+            }
+        };
+
+        // we compute the matches interval if we have at least 2 matches.
+        // current interval positions.
+        let mut interval_first = 0;
+        let mut interval_first_match_first_word_pos = matches[interval_first].get_first_word_pos();
+
+        for (index, next_match) in matches.iter().enumerate() {
+            // if next match would make interval gross more than crop_size,
+            // we compare the current interval with the best one,
+            // then we increase `interval_first` until next match can be added.
+            let next_match_last_word_pos = next_match.get_last_word_pos();
+
+            // if the next match would mean that we pass the crop size window,
+            // we take the last valid match, that didn't pass this boundry, which is `index` - 1,
+            // and calculate a score for it, and check if it's better than our best so far
+            if next_match_last_word_pos - interval_first_match_first_word_pos >= crop_size {
+                // if index is 0 there is no last viable match
+                if index != 0 {
+                    let interval_last = index - 1;
+                    let interval_score =
+                        MatchIntervalScore::new(&matches[interval_first..=interval_last]);
+
+                    // keep interval if it's the best
+                    save_best_interval(interval_first, interval_last, interval_score);
+                }
+
+                // advance start of the interval while interval is longer than crop_size.
+                loop {
+                    interval_first += 1;
+                    interval_first_match_first_word_pos =
+                        matches[interval_first].get_first_word_pos();
+
+                    if interval_first_match_first_word_pos > next_match_last_word_pos
+                        || next_match_last_word_pos - interval_first_match_first_word_pos
+                            < crop_size
+                    {
+                        break;
+                    }
+                }
+            }
+        }
+
+        // compute the last interval score and compare it to the best one.
+        let interval_last = matches_len - 1;
+        // if it's the last match with itself, we need to make sure it's
+        // not a phrase longer than the crop window
+        if interval_first != interval_last || matches[interval_first].get_word_count() < crop_size {
+            let interval_score = MatchIntervalScore::new(&matches[interval_first..=interval_last]);
+            save_best_interval(interval_first, interval_last, interval_score);
+        }
+
+        // if none of the matches fit the criteria above, default to the first one
+        let best_interval = best_interval.map_or((0, 0), |v| v.interval);
+        &matches[best_interval.0..=best_interval.1]
+    }
+}
+
 /// Structure used to analyze a string, compute words that match,
 /// and format the source string, returning a highlighted and cropped sub-string.
 pub struct Matcher<'t, 'tokenizer, 'b, 'lang> {
@@ -415,14 +558,16 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
 
             (crop_byte_start, crop_byte_end)
         } else {
-            // there's one match? and it's longer than the crop window, so we have to advance inward
+            // there's one match and it's longer than the crop window, so we have to advance inward
             let mut remaining_extra_words = matches_window_len - crop_size;
             let mut tokens_from_end =
                 tokens[..=last_match_last_token_position].iter().rev().peekable();
 
             while remaining_extra_words > 0 {
-                let token_from_end_kind =
-                    tokens_from_end.peek().map(SimpleTokenKind::get).expect("TODO");
+                let token_from_end_kind = tokens_from_end
+                    .peek()
+                    .map(SimpleTokenKind::get)
+                    .expect("Expected iterator to not reach end");
                 if token_from_end_kind.is_not_separator() {
                     remaining_extra_words -= 1;
                 }
@@ -435,165 +580,15 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
             } else {
                 &0
             };
-            let crop_byte_end = tokens_from_end.next().map(|t| t.byte_start).expect("TODO");
+            let crop_byte_end = tokens_from_end
+                .next()
+                .map(|t| t.byte_start)
+                .expect("Expected iterator to not reach end");
 
             (*crop_byte_start, crop_byte_end)
         }
     }
 
-    /// Compute the score of a match interval:
-    /// 1) count unique matches
-    /// 2) calculate distance between matches
-    /// 3) count ordered matches
-    fn match_interval_score(&self, matches: &[Match]) -> (i16, i16, i16) {
-        let mut ids: Vec<WordId> = Vec::with_capacity(matches.len());
-        let mut order_score = 0;
-        let mut distance_score = 0;
-
-        // count score for phrases
-        fn tally_phrase_scores(
-            fwp: &usize,
-            lwp: &usize,
-            order_score: &mut i16,
-            distance_score: &mut i16,
-        ) {
-            let words_in_phrase_minus_one = (lwp - fwp) as i16;
-            // will always be ordered, so +1 for each space between words
-            *order_score += words_in_phrase_minus_one;
-            // distance will always be 1, so -1 for each space between words
-            *distance_score -= words_in_phrase_minus_one;
-        }
-
-        let mut iter = matches.iter().peekable();
-        while let Some(m) = iter.next() {
-            if let Some(next_match) = iter.peek() {
-                // if matches are ordered
-                if next_match.ids.iter().min() > m.ids.iter().min() {
-                    order_score += 1;
-                }
-
-                let m_last_word_pos = match m.position {
-                    MatchPosition::Word { word_position, .. } => word_position,
-                    MatchPosition::Phrase { word_positions: (fwp, lwp), .. } => {
-                        tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score);
-                        lwp
-                    }
-                };
-                let next_match_first_word_pos = next_match.get_first_word_pos();
-
-                // compute distance between matches
-                distance_score -= (next_match_first_word_pos - m_last_word_pos).min(7) as i16;
-            } else if let MatchPosition::Phrase { word_positions: (fwp, lwp), .. } = m.position {
-                // in case last match is a phrase, count score for its words
-                tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score);
-            }
-
-            ids.extend(m.ids.iter());
-        }
-
-        ids.sort_unstable();
-        ids.dedup();
-        let uniq_score = ids.len() as i16;
-
-        // rank by unique match count, then by distance between matches, then by ordered match count.
-        (uniq_score, distance_score, order_score)
-    }
-
-    /// Returns the matches interval where the score computed by match_interval_score is the best.
-    fn find_best_match_interval<'a>(&self, matches: &'a [Match], crop_size: usize) -> &'a [Match] {
-        let matches_len = matches.len();
-        if matches_len <= 1 {
-            return matches;
-        }
-
-        // positions of the first and the last match of the best matches interval in `matches`.
-        struct BestInterval {
-            interval: (usize, usize),
-            score: (i16, i16, i16),
-        }
-
-        fn save_best_interval(
-            best_interval: &mut Option<BestInterval>,
-            interval_first: usize,
-            interval_last: usize,
-            interval_score: (i16, i16, i16),
-        ) {
-            if let Some(best_interval) = best_interval {
-                if interval_score > best_interval.score {
-                    best_interval.interval = (interval_first, interval_last);
-                    best_interval.score = interval_score;
-                }
-            } else {
-                *best_interval = Some(BestInterval {
-                    interval: (interval_first, interval_last),
-                    score: interval_score,
-                });
-            }
-        }
-
-        let mut best_interval: Option<BestInterval> = None;
-
-        // we compute the matches interval if we have at least 2 matches.
-        // current interval positions.
-        let mut interval_first = 0;
-        let mut interval_first_match_first_word_pos = matches[interval_first].get_first_word_pos();
-
-        for (index, next_match) in matches.iter().enumerate() {
-            // if next match would make interval gross more than crop_size,
-            // we compare the current interval with the best one,
-            // then we increase `interval_first` until next match can be added.
-            let next_match_last_word_pos = next_match.get_last_word_pos();
-
-            // if the next match would mean that we pass the crop size window,
-            // we take the last valid match, that didn't pass this boundry, which is `index` - 1,
-            // and calculate a score for it, and check if it's better than our best so far
-            if next_match_last_word_pos - interval_first_match_first_word_pos >= crop_size {
-                // if index is 0 there is no last viable match
-                if index != 0 {
-                    let interval_last = index - 1;
-                    let interval_score =
-                        self.match_interval_score(&matches[interval_first..=interval_last]);
-
-                    // keep interval if it's the best
-                    save_best_interval(
-                        &mut best_interval,
-                        interval_first,
-                        interval_last,
-                        interval_score,
-                    );
-                }
-
-                // advance start of the interval while interval is longer than crop_size.
-                loop {
-                    interval_first += 1;
-                    interval_first_match_first_word_pos =
-                        matches[interval_first].get_first_word_pos();
-
-                    if interval_first_match_first_word_pos > next_match_last_word_pos
-                        || next_match_last_word_pos - interval_first_match_first_word_pos
-                            < crop_size
-                    {
-                        break;
-                    }
-                }
-            }
-        }
-
-        // compute the last interval score and compare it to the best one.
-        let interval_last = matches_len - 1;
-        // if it's the last match with itself, we need to make sure it's
-        // not a phrase longer than the crop window
-        if interval_first != interval_last || matches[interval_first].get_word_count() < crop_size {
-            let interval_score =
-                self.match_interval_score(&matches[interval_first..=interval_last]);
-            save_best_interval(&mut best_interval, interval_first, interval_last, interval_score);
-        }
-
-        // if none of the matches fit the criteria above, default to the first one
-        let best_interval = best_interval.map_or((0, 0), |v| v.interval);
-        &matches[best_interval.0..=best_interval.1]
-    }
-
     // Returns the formatted version of the original text.
     pub fn format(&mut self, format_options: FormatOptions) -> Cow<'t, str> {
         if !format_options.highlight && format_options.crop.is_none() {
@@ -606,7 +601,9 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                     // crop around the best interval.
                     let (byte_start, byte_end) = match format_options.crop {
                         Some(crop_size) if crop_size > 0 => {
-                            let matches = self.find_best_match_interval(matches, crop_size);
+                            let matches = MatchIntervalWithScore::find_best_match_interval(
+                                matches, crop_size,
+                            );
                             self.crop_bounds(tokens, matches, crop_size)
                         }
                         _ => (0, self.text.len()),
@@ -1046,6 +1043,7 @@ mod tests {
         let mut matcher = builder.build(text, None);
         insta::assert_snapshot!(
             matcher.format(format_options),
+            // @TODO: Should probably highlight it all, even if it didn't fit the whole phrase
             @"The groundbreaking invention had the power to split the world…"
         );
 
@@ -1057,6 +1055,7 @@ mod tests {
         let mut matcher = builder.build(text, None);
         insta::assert_snapshot!(
             matcher.format(format_options),
+            // @TODO: Should probably include end of string in this case?
             @"…between those who <em>embraced progress and those who resisted change</em>…"
         );
 
@@ -1090,17 +1089,7 @@ mod tests {
         let mut matcher = builder.build(text, None);
         insta::assert_snapshot!(
             matcher.format(format_options),
-            @"…invention <em>had the power to split the world between those</em>…"
-        );
-
-        let builder = MatcherBuilder::new_test(
-            &rtxn,
-            &temp_index,
-            "\"The groundbreaking invention\" \"had the power to split the world between those\"",
-        );
-        let mut matcher = builder.build(text, None);
-        insta::assert_snapshot!(
-            matcher.format(format_options),
+            // @TODO: "invention" should be highlighted as well
             @"…invention <em>had the power to split the world between those</em>…"
         );
     }

From d9e4db9983e7017bb13a89f7e28def43069e1a58 Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Tue, 1 Oct 2024 17:50:59 +0300
Subject: [PATCH 31/92] Refactor

---
 milli/src/search/new/matches/mod.rs | 38 ++++++++++++-----------------
 1 file changed, 16 insertions(+), 22 deletions(-)

diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index 804b59553..1552de8aa 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -166,19 +166,12 @@ enum SimpleTokenKind {
 }
 
 impl SimpleTokenKind {
-    fn get(token: &&Token<'_>) -> Self {
+    fn new(token: &&Token<'_>) -> Self {
         match token.kind {
             TokenKind::Separator(separaor_kind) => Self::Separator(separaor_kind),
             _ => Self::NotSeparator,
         }
     }
-
-    fn is_not_separator(&self) -> bool {
-        match self {
-            SimpleTokenKind::NotSeparator => true,
-            SimpleTokenKind::Separator(_) => false,
-        }
-    }
 }
 
 #[derive(PartialEq, PartialOrd)]
@@ -259,9 +252,12 @@ impl MatchIntervalWithScore {
 
         // positions of the first and the last match of the best matches interval in `matches`.
         let mut best_interval: Option<Self> = None;
-        let mut save_best_interval = |interval_first, interval_last, interval_score| {
+
+        let mut save_best_interval = |interval_first, interval_last| {
+            let interval_score = MatchIntervalScore::new(&matches[interval_first..=interval_last]);
             let is_interval_score_better =
                 &best_interval.as_ref().map_or(true, |Self { score, .. }| interval_score > *score);
+
             if *is_interval_score_better {
                 best_interval =
                     Some(Self { interval: (interval_first, interval_last), score: interval_score });
@@ -286,11 +282,8 @@ impl MatchIntervalWithScore {
                 // if index is 0 there is no last viable match
                 if index != 0 {
                     let interval_last = index - 1;
-                    let interval_score =
-                        MatchIntervalScore::new(&matches[interval_first..=interval_last]);
-
                     // keep interval if it's the best
-                    save_best_interval(interval_first, interval_last, interval_score);
+                    save_best_interval(interval_first, interval_last);
                 }
 
                 // advance start of the interval while interval is longer than crop_size.
@@ -314,8 +307,7 @@ impl MatchIntervalWithScore {
         // if it's the last match with itself, we need to make sure it's
         // not a phrase longer than the crop window
         if interval_first != interval_last || matches[interval_first].get_word_count() < crop_size {
-            let interval_score = MatchIntervalScore::new(&matches[interval_first..=interval_last]);
-            save_best_interval(interval_first, interval_last, interval_score);
+            save_best_interval(interval_first, interval_last);
         }
 
         // if none of the matches fit the criteria above, default to the first one
@@ -359,6 +351,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                     Some(MatchType::Full { ids, .. }) => {
                         // save the token that closes the partial match as a match.
                         matches.push(Match {
+                            // @TODO: Shouldn't this be +1?
                             match_len: word.char_end - *first_word_char_start,
                             ids: ids.clone().collect(),
                             position: MatchPosition::Phrase {
@@ -484,8 +477,8 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
             // grows the crop window peeking in both directions
             // until the window contains the good number of words:
             while remaining_words > 0 {
-                let before_token_kind = before_tokens.peek().map(SimpleTokenKind::get);
-                let after_token_kind = after_tokens.peek().map(SimpleTokenKind::get);
+                let before_token_kind = before_tokens.peek().map(SimpleTokenKind::new);
+                let after_token_kind = after_tokens.peek().map(SimpleTokenKind::new);
 
                 match (before_token_kind, after_token_kind) {
                     // we can expand both sides.
@@ -504,7 +497,8 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                                     if remaining_words > 1 {
                                         after_tokens.next();
                                     }
-                                } else if let SeparatorKind::Hard = before_token_separator_kind {
+                                } else if matches!(before_token_separator_kind, SeparatorKind::Hard)
+                                {
                                     after_tokens.next();
                                 } else {
                                     before_tokens.next();
@@ -536,14 +530,14 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                     // the end of the text is reached, advance left.
                     (Some(before_token_kind), None) => {
                         before_tokens.next();
-                        if let SimpleTokenKind::NotSeparator = before_token_kind {
+                        if matches!(before_token_kind, SimpleTokenKind::NotSeparator) {
                             remaining_words -= 1;
                         }
                     }
                     // the start of the text is reached, advance right.
                     (None, Some(after_token_kind)) => {
                         after_tokens.next();
-                        if let SimpleTokenKind::NotSeparator = after_token_kind {
+                        if matches!(after_token_kind, SimpleTokenKind::NotSeparator) {
                             remaining_words -= 1;
                         }
                     }
@@ -566,9 +560,9 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
             while remaining_extra_words > 0 {
                 let token_from_end_kind = tokens_from_end
                     .peek()
-                    .map(SimpleTokenKind::get)
+                    .map(SimpleTokenKind::new)
                     .expect("Expected iterator to not reach end");
-                if token_from_end_kind.is_not_separator() {
+                if matches!(token_from_end_kind, SimpleTokenKind::NotSeparator) {
                     remaining_extra_words -= 1;
                 }
 

From 4b598fa648944a5f5f1cdd7ecbdadd1cb8d3d659 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 30 Sep 2024 13:12:01 +0200
Subject: [PATCH 32/92] update arroy

---
 Cargo.lock                 |  5 +++--
 index-scheduler/Cargo.toml |  2 +-
 milli/Cargo.toml           |  2 +-
 milli/src/error.rs         |  1 +
 milli/src/vector/mod.rs    | 33 +++++++++++++++++++++------------
 5 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 3237d4e16..c85a59952 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -386,8 +386,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
 
 [[package]]
 name = "arroy"
-version = "0.4.0"
-source = "git+https://github.com/meilisearch/arroy/?rev=2386594dfb009ce08821a925ccc89fb8e30bf73d#2386594dfb009ce08821a925ccc89fb8e30bf73d"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfc5f272f38fa063bbff0a7ab5219404e221493de005e2b4078c62d626ef567e"
 dependencies = [
  "bytemuck",
  "byteorder",
diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml
index 432a86382..e80311005 100644
--- a/index-scheduler/Cargo.toml
+++ b/index-scheduler/Cargo.toml
@@ -40,7 +40,7 @@ ureq = "2.10.0"
 uuid = { version = "1.10.0", features = ["serde", "v4"] }
 
 [dev-dependencies]
-arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" }
+arroy = "0.5.0"
 big_s = "1.0.2"
 crossbeam = "0.8.4"
 insta = { version = "1.39.0", features = ["json", "redactions"] }
diff --git a/milli/Cargo.toml b/milli/Cargo.toml
index 01384f496..df0e59496 100644
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -80,7 +80,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
 tiktoken-rs = "0.5.9"
 liquid = "0.26.6"
 rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838f366f2b83fd65f20a1e4", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
-arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" }
+arroy = "0.5.0"
 rand = "0.8.5"
 tracing = "0.1.40"
 ureq = { version = "2.10.0", features = ["json"] }
diff --git a/milli/src/error.rs b/milli/src/error.rs
index 400d3d3be..840db7606 100644
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -297,6 +297,7 @@ impl From<arroy::Error> for Error {
             arroy::Error::InvalidVecDimension { expected, received } => {
                 Error::UserError(UserError::InvalidVectorDimensions { expected, found: received })
             }
+            arroy::Error::BuildCancelled => Error::InternalError(InternalError::AbortedIndexation),
             arroy::Error::DatabaseFull
             | arroy::Error::InvalidItemAppend
             | arroy::Error::UnmatchingDistance { .. }
diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs
index b6d6510af..097e93ad2 100644
--- a/milli/src/vector/mod.rs
+++ b/milli/src/vector/mod.rs
@@ -1,7 +1,7 @@
 use std::collections::HashMap;
 use std::sync::Arc;
 
-use arroy::distances::{Angular, BinaryQuantizedAngular};
+use arroy::distances::{BinaryQuantizedCosine, Cosine};
 use arroy::ItemId;
 use deserr::{DeserializeError, Deserr};
 use heed::{RoTxn, RwTxn, Unspecified};
@@ -87,7 +87,7 @@ impl ArroyWrapper {
             if self.quantized {
                 let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
                 if writer.need_build(wtxn)? {
-                    writer.build(wtxn, rng, None)?
+                    writer.builder(rng).build(wtxn)?
                 } else if writer.is_empty(wtxn)? {
                     break;
                 }
@@ -99,11 +99,10 @@ impl ArroyWrapper {
                 // only happens once in the life of an embedder, it's not very performances
                 // sensitive.
                 if quantizing && !self.quantized {
-                    let writer =
-                        writer.prepare_changing_distance::<BinaryQuantizedAngular>(wtxn)?;
-                    writer.build(wtxn, rng, None)?
+                    let writer = writer.prepare_changing_distance::<BinaryQuantizedCosine>(wtxn)?;
+                    writer.builder(rng).build(wtxn)?;
                 } else if writer.need_build(wtxn)? {
-                    writer.build(wtxn, rng, None)?
+                    writer.builder(rng).build(wtxn)?;
                 } else if writer.is_empty(wtxn)? {
                     break;
                 }
@@ -323,8 +322,13 @@ impl ArroyWrapper {
         let mut results = Vec::new();
 
         for reader in self.readers(rtxn, db) {
-            let ret = reader?.nns_by_item(rtxn, item, limit, None, None, filter)?;
-            if let Some(mut ret) = ret {
+            let reader = reader?;
+            let mut searcher = reader.nns(limit);
+            if let Some(filter) = filter {
+                searcher.candidates(filter);
+            }
+
+            if let Some(mut ret) = searcher.by_item(rtxn, item)? {
                 results.append(&mut ret);
             } else {
                 break;
@@ -359,8 +363,13 @@ impl ArroyWrapper {
         let mut results = Vec::new();
 
         for reader in self.readers(rtxn, db) {
-            let mut ret = reader?.nns_by_vector(rtxn, vector, limit, None, None, filter)?;
-            results.append(&mut ret);
+            let reader = reader?;
+            let mut searcher = reader.nns(limit);
+            if let Some(filter) = filter {
+                searcher.candidates(filter);
+            }
+
+            results.append(&mut searcher.by_vector(rtxn, vector)?);
         }
 
         results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
@@ -391,11 +400,11 @@ impl ArroyWrapper {
         Ok(vectors)
     }
 
-    fn angular_db(&self) -> arroy::Database<Angular> {
+    fn angular_db(&self) -> arroy::Database<Cosine> {
         self.database.remap_data_type()
     }
 
-    fn quantized_db(&self) -> arroy::Database<BinaryQuantizedAngular> {
+    fn quantized_db(&self) -> arroy::Database<BinaryQuantizedCosine> {
         self.database.remap_data_type()
     }
 }

From b1dc10e771a757826fe400280c8bac84976ce95b Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 1 Oct 2024 17:45:49 +0200
Subject: [PATCH 33/92] uses the new cancellation method in arroy

---
 milli/src/update/index_documents/mod.rs | 3 ++-
 milli/src/vector/mod.rs                 | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index e164a0817..88d20fff0 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -699,6 +699,7 @@ where
         for (embedder_name, dimension) in dimension {
             let wtxn = &mut *self.wtxn;
             let vector_arroy = self.index.vector_arroy;
+            let cancel = &self.should_abort;
 
             let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
                 InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
@@ -713,7 +714,7 @@ where
 
             pool.install(|| {
                 let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
-                writer.build_and_quantize(wtxn, &mut rng, dimension, is_quantizing)?;
+                writer.build_and_quantize(wtxn, &mut rng, dimension, is_quantizing, cancel)?;
                 Result::Ok(())
             })
             .map_err(InternalError::from)??;
diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs
index 097e93ad2..571c02c8c 100644
--- a/milli/src/vector/mod.rs
+++ b/milli/src/vector/mod.rs
@@ -82,6 +82,7 @@ impl ArroyWrapper {
         rng: &mut R,
         dimension: usize,
         quantizing: bool,
+        cancel: &(impl Fn() -> bool + Sync + Send),
     ) -> Result<(), arroy::Error> {
         for index in arroy_db_range_for_embedder(self.embedder_index) {
             if self.quantized {
@@ -100,9 +101,9 @@ impl ArroyWrapper {
                 // sensitive.
                 if quantizing && !self.quantized {
                     let writer = writer.prepare_changing_distance::<BinaryQuantizedCosine>(wtxn)?;
-                    writer.builder(rng).build(wtxn)?;
+                    writer.builder(rng).cancel(cancel).build(wtxn)?;
                 } else if writer.need_build(wtxn)? {
-                    writer.builder(rng).build(wtxn)?;
+                    writer.builder(rng).cancel(cancel).build(wtxn)?;
                 } else if writer.is_empty(wtxn)? {
                     break;
                 }

From 37a9d64c4441bb6a4a199ad018ab4ddb44d4d958 Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Tue, 1 Oct 2024 22:52:01 +0300
Subject: [PATCH 34/92] Fix failing test, refactor

---
 milli/src/search/new/matches/mod.rs | 44 ++++++++++++++++++-----------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index 1552de8aa..ae1264482 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -245,8 +245,7 @@ struct MatchIntervalWithScore {
 impl MatchIntervalWithScore {
     /// Returns the matches interval where the score computed by match_interval_score is the best.
     fn find_best_match_interval(matches: &[Match], crop_size: usize) -> &[Match] {
-        let matches_len = matches.len();
-        if matches_len <= 1 {
+        if matches.len() <= 1 {
             return matches;
         }
 
@@ -303,7 +302,7 @@ impl MatchIntervalWithScore {
         }
 
         // compute the last interval score and compare it to the best one.
-        let interval_last = matches_len - 1;
+        let interval_last = matches.len() - 1;
         // if it's the last match with itself, we need to make sure it's
         // not a phrase longer than the crop window
         if interval_first != interval_last || matches[interval_first].get_word_count() < crop_size {
@@ -451,28 +450,39 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
         crop_size: usize,
     ) -> (usize, usize) {
         // if there is no match, we start from the beginning of the string by default.
-        let first_match_first_word_position =
-            matches.first().map(|m| m.get_first_word_pos()).unwrap_or(0);
-        let first_match_first_token_position =
-            matches.first().map(|m| m.get_first_token_pos()).unwrap_or(0);
-        let last_match_last_word_position =
-            matches.last().map(|m| m.get_last_word_pos()).unwrap_or(0);
-        let last_match_last_token_position =
-            matches.last().map(|m| m.get_last_token_pos()).unwrap_or(0);
+        let (matches_size, first_match_first_token_position, last_match_last_token_position) =
+            if !matches.is_empty() {
+                let matches_first = matches.first().unwrap();
+                let matches_last = matches.last().unwrap();
 
-        let matches_window_len =
-            last_match_last_word_position - first_match_first_word_position + 1;
+                (
+                    matches_last.get_last_word_pos() - matches_first.get_first_word_pos() + 1,
+                    matches_first.get_first_token_pos(),
+                    matches_last.get_last_token_pos(),
+                )
+            } else {
+                (0, 0, 0)
+            };
 
-        if crop_size >= matches_window_len {
+        if crop_size >= matches_size {
             // matches needs to be counted in the crop len.
-            let mut remaining_words = crop_size - matches_window_len;
+            let mut remaining_words = crop_size - matches_size;
+
+            let last_match_last_token_position_plus_one = last_match_last_token_position + 1;
+            let after_tokens_starting_index = if matches_size == 0 {
+                0
+            } else if last_match_last_token_position_plus_one < tokens.len() {
+                last_match_last_token_position_plus_one
+            } else {
+                tokens.len()
+            };
 
             // create the initial state of the crop window: 2 iterators starting from the matches positions,
             // a reverse iterator starting from the first match token position and going towards the beginning of the text,
             let mut before_tokens =
                 tokens[..first_match_first_token_position].iter().rev().peekable();
             // an iterator starting from the last match token position and going towards the end of the text.
-            let mut after_tokens = tokens[last_match_last_token_position + 1..].iter().peekable();
+            let mut after_tokens = tokens[after_tokens_starting_index..].iter().peekable();
 
             // grows the crop window peeking in both directions
             // until the window contains the good number of words:
@@ -553,7 +563,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
             (crop_byte_start, crop_byte_end)
         } else {
             // there's one match and it's longer than the crop window, so we have to advance inward
-            let mut remaining_extra_words = matches_window_len - crop_size;
+            let mut remaining_extra_words = matches_size - crop_size;
             let mut tokens_from_end =
                 tokens[..=last_match_last_token_position].iter().rev().peekable();
 

From 62dfbd6255846db8fcfb7c515a9ad041999f7d3a Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 2 Oct 2024 11:20:02 +0200
Subject: [PATCH 35/92] Add binary quantized to allowed fields for source adds
 its sources

---
 milli/src/vector/settings.rs | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs
index 3bb7f09e6..d1cf364a2 100644
--- a/milli/src/vector/settings.rs
+++ b/milli/src/vector/settings.rs
@@ -417,6 +417,8 @@ impl EmbeddingSettings {
 
     pub const DISTRIBUTION: &'static str = "distribution";
 
+    pub const BINARY_QUANTIZED: &'static str = "binaryQuantized";
+
     pub fn allowed_sources_for_field(field: &'static str) -> &'static [EmbedderSource] {
         match field {
             Self::SOURCE => &[
@@ -456,6 +458,13 @@ impl EmbeddingSettings {
                 EmbedderSource::Rest,
                 EmbedderSource::UserProvided,
             ],
+            Self::BINARY_QUANTIZED => &[
+                EmbedderSource::HuggingFace,
+                EmbedderSource::Ollama,
+                EmbedderSource::OpenAi,
+                EmbedderSource::Rest,
+                EmbedderSource::UserProvided,
+            ],
             _other => unreachable!("unknown field"),
         }
     }
@@ -470,6 +479,7 @@ impl EmbeddingSettings {
                 Self::DIMENSIONS,
                 Self::DISTRIBUTION,
                 Self::URL,
+                Self::BINARY_QUANTIZED,
             ],
             EmbedderSource::HuggingFace => &[
                 Self::SOURCE,
@@ -477,6 +487,7 @@ impl EmbeddingSettings {
                 Self::REVISION,
                 Self::DOCUMENT_TEMPLATE,
                 Self::DISTRIBUTION,
+                Self::BINARY_QUANTIZED,
             ],
             EmbedderSource::Ollama => &[
                 Self::SOURCE,
@@ -486,8 +497,11 @@ impl EmbeddingSettings {
                 Self::API_KEY,
                 Self::DIMENSIONS,
                 Self::DISTRIBUTION,
+                Self::BINARY_QUANTIZED,
             ],
-            EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS, Self::DISTRIBUTION],
+            EmbedderSource::UserProvided => {
+                &[Self::SOURCE, Self::DIMENSIONS, Self::DISTRIBUTION, Self::BINARY_QUANTIZED]
+            }
             EmbedderSource::Rest => &[
                 Self::SOURCE,
                 Self::API_KEY,
@@ -498,6 +512,7 @@ impl EmbeddingSettings {
                 Self::RESPONSE,
                 Self::HEADERS,
                 Self::DISTRIBUTION,
+                Self::BINARY_QUANTIZED,
             ],
         }
     }

From 0c2661ea90f26d3269d0ed53cb47fa69bf9e5600 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 2 Oct 2024 11:20:29 +0200
Subject: [PATCH 36/92] Fix tests

---
 meilisearch/tests/vector/settings.rs | 47 ++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/meilisearch/tests/vector/settings.rs b/meilisearch/tests/vector/settings.rs
index 4f07ca18b..ed45913a8 100644
--- a/meilisearch/tests/vector/settings.rs
+++ b/meilisearch/tests/vector/settings.rs
@@ -4,6 +4,53 @@ use crate::common::{GetAllDocumentsOptions, Server};
 use crate::json;
 use crate::vector::generate_default_user_provided_documents;
 
+#[actix_rt::test]
+async fn field_unavailable_for_source() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    let (value, code) = server.set_features(json!({"vectorStore": true})).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(value, @r###"
+    {
+      "vectorStore": true,
+      "metrics": false,
+      "logsRoute": false,
+      "editDocumentsByFunction": false,
+      "containsFilter": false
+    }
+    "###);
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": { "manual": {"source": "userProvided", "documentTemplate": "{{doc.documentTemplate}}"}},
+        }))
+        .await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(response, @r###"
+    {
+      "message": "`.embedders.manual`: Field `documentTemplate` unavailable for source `userProvided` (only available for sources: `huggingFace`, `openAi`, `ollama`, `rest`). Available fields: `source`, `dimensions`, `distribution`, `binaryQuantized`",
+      "code": "invalid_settings_embedders",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
+    }
+    "###);
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": { "default": {"source": "openAi", "revision": "42"}},
+        }))
+        .await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(response, @r###"
+    {
+      "message": "`.embedders.default`: Field `revision` unavailable for source `openAi` (only available for sources: `huggingFace`). Available fields: `source`, `model`, `apiKey`, `documentTemplate`, `dimensions`, `distribution`, `url`, `binaryQuantized`",
+      "code": "invalid_settings_embedders",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
+    }
+    "###);
+}
+
 #[actix_rt::test]
 async fn update_embedder() {
     let server = Server::new().await;

From 40336ce87d46b43123d03cb343b4d3f785001a9c Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Thu, 3 Oct 2024 10:40:14 +0300
Subject: [PATCH 37/92] Fix and refactor crop_bounds

---
 milli/src/search/new/matches/mod.rs | 231 ++++++++++++++--------------
 1 file changed, 113 insertions(+), 118 deletions(-)

diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index ae1264482..f8d60ef54 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -1,6 +1,7 @@
 use std::borrow::Cow;
 
 use charabia::{Language, SeparatorKind, Token, TokenKind, Tokenizer};
+use either::Either;
 pub use matching_words::MatchingWords;
 use matching_words::{MatchType, PartialMatch, WordId};
 use serde::Serialize;
@@ -450,147 +451,141 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
         crop_size: usize,
     ) -> (usize, usize) {
         // if there is no match, we start from the beginning of the string by default.
-        let (matches_size, first_match_first_token_position, last_match_last_token_position) =
-            if !matches.is_empty() {
-                let matches_first = matches.first().unwrap();
-                let matches_last = matches.last().unwrap();
+        let (
+            mut remaining_words,
+            is_iterating_forward,
+            before_tokens_starting_index,
+            after_tokens_starting_index,
+        ) = if !matches.is_empty() {
+            let matches_first = matches.first().unwrap();
+            let matches_last = matches.last().unwrap();
 
-                (
-                    matches_last.get_last_word_pos() - matches_first.get_first_word_pos() + 1,
-                    matches_first.get_first_token_pos(),
-                    matches_last.get_last_token_pos(),
-                )
+            let matches_size =
+                matches_last.get_last_word_pos() - matches_first.get_first_word_pos() + 1;
+
+            let is_crop_size_gte_match_size = crop_size >= matches_size;
+            let is_iterating_forward = matches_size == 0 || is_crop_size_gte_match_size;
+
+            let remaining_words = if is_crop_size_gte_match_size {
+                crop_size - matches_size
             } else {
-                (0, 0, 0)
+                // in case matches size is greater than crop size, which implies there's only one match,
+                // we count words backwards, because we have to remove words, as they're extra words outside of
+                // crop window
+                matches_size - crop_size
             };
 
-        if crop_size >= matches_size {
-            // matches needs to be counted in the crop len.
-            let mut remaining_words = crop_size - matches_size;
-
-            let last_match_last_token_position_plus_one = last_match_last_token_position + 1;
             let after_tokens_starting_index = if matches_size == 0 {
                 0
-            } else if last_match_last_token_position_plus_one < tokens.len() {
-                last_match_last_token_position_plus_one
             } else {
-                tokens.len()
+                let last_match_last_token_position_plus_one = matches_last.get_last_token_pos() + 1;
+                if last_match_last_token_position_plus_one < tokens.len() {
+                    last_match_last_token_position_plus_one
+                } else {
+                    // we have matched the end of possible tokens, there's nothing to advance
+                    tokens.len() - 1
+                }
             };
 
-            // create the initial state of the crop window: 2 iterators starting from the matches positions,
-            // a reverse iterator starting from the first match token position and going towards the beginning of the text,
-            let mut before_tokens =
-                tokens[..first_match_first_token_position].iter().rev().peekable();
-            // an iterator starting from the last match token position and going towards the end of the text.
-            let mut after_tokens = tokens[after_tokens_starting_index..].iter().peekable();
+            (
+                remaining_words,
+                is_iterating_forward,
+                if is_iterating_forward { matches_first.get_first_token_pos() } else { 0 },
+                after_tokens_starting_index,
+            )
+        } else {
+            (crop_size, true, 0, 0)
+        };
 
-            // grows the crop window peeking in both directions
-            // until the window contains the good number of words:
-            while remaining_words > 0 {
-                let before_token_kind = before_tokens.peek().map(SimpleTokenKind::new);
-                let after_token_kind = after_tokens.peek().map(SimpleTokenKind::new);
+        // create the initial state of the crop window: 2 iterators starting from the matches positions,
+        // a reverse iterator starting from the first match token position and going towards the beginning of the text,
+        let mut before_tokens = tokens[..before_tokens_starting_index].iter().rev().peekable();
+        // an iterator ...
+        let mut after_tokens = if is_iterating_forward {
+            // ... starting from the last match token position and going towards the end of the text.
+            Either::Left(tokens[after_tokens_starting_index..].iter().peekable())
+        } else {
+            // ... starting from the last match token position and going towards the start of the text.
+            Either::Right(tokens[..=after_tokens_starting_index].iter().rev().peekable())
+        };
 
-                match (before_token_kind, after_token_kind) {
-                    // we can expand both sides.
-                    (Some(before_token_kind), Some(after_token_kind)) => {
-                        match (before_token_kind, after_token_kind) {
-                            // if they are both separators and are the same kind then advance both,
-                            // or expand in the soft separator separator side.
-                            (
-                                SimpleTokenKind::Separator(before_token_separator_kind),
-                                SimpleTokenKind::Separator(after_token_separator_kind),
-                            ) => {
-                                if before_token_separator_kind == after_token_separator_kind {
-                                    before_tokens.next();
+        // grows the crop window peeking in both directions
+        // until the window contains the good number of words:
+        while remaining_words > 0 {
+            let before_token_kind = before_tokens.peek().map(SimpleTokenKind::new);
+            let after_token_kind =
+                after_tokens.as_mut().either(|v| v.peek(), |v| v.peek()).map(SimpleTokenKind::new);
 
-                                    // this avoid having an ending separator before crop marker.
-                                    if remaining_words > 1 {
-                                        after_tokens.next();
-                                    }
-                                } else if matches!(before_token_separator_kind, SeparatorKind::Hard)
-                                {
-                                    after_tokens.next();
-                                } else {
-                                    before_tokens.next();
-                                }
-                            }
-                            // if one of the tokens is a word, we expend in the side of the word.
-                            // left is a word, advance left.
-                            (SimpleTokenKind::NotSeparator, SimpleTokenKind::Separator(_)) => {
+            match (before_token_kind, after_token_kind) {
+                // we can expand both sides.
+                (Some(before_token_kind), Some(after_token_kind)) => {
+                    match (before_token_kind, after_token_kind) {
+                        // if they are both separators and are the same kind then advance both,
+                        // or expand in the soft separator separator side.
+                        (
+                            SimpleTokenKind::Separator(before_token_separator_kind),
+                            SimpleTokenKind::Separator(after_token_separator_kind),
+                        ) => {
+                            if before_token_separator_kind == after_token_separator_kind {
+                                before_tokens.next();
+
+                                // this avoid having an ending separator before crop marker.
+                                if remaining_words > 1 {
+                                    after_tokens.next();
+                                }
+                            } else if matches!(before_token_separator_kind, SeparatorKind::Hard) {
+                                after_tokens.next();
+                            } else {
                                 before_tokens.next();
-                                remaining_words -= 1;
                             }
-                            // right is a word, advance right.
-                            (SimpleTokenKind::Separator(_), SimpleTokenKind::NotSeparator) => {
+                        }
+                        // if one of the tokens is a word, we expend in the side of the word.
+                        // left is a word, advance left.
+                        (SimpleTokenKind::NotSeparator, SimpleTokenKind::Separator(_)) => {
+                            before_tokens.next();
+                            remaining_words -= 1;
+                        }
+                        // right is a word, advance right.
+                        (SimpleTokenKind::Separator(_), SimpleTokenKind::NotSeparator) => {
+                            after_tokens.next();
+                            remaining_words -= 1;
+                        }
+                        // both are words, advance left then right if remaining_word > 0.
+                        (SimpleTokenKind::NotSeparator, SimpleTokenKind::NotSeparator) => {
+                            before_tokens.next();
+                            remaining_words -= 1;
+
+                            if remaining_words > 0 {
                                 after_tokens.next();
                                 remaining_words -= 1;
                             }
-                            // both are words, advance left then right if remaining_word > 0.
-                            (SimpleTokenKind::NotSeparator, SimpleTokenKind::NotSeparator) => {
-                                before_tokens.next();
-                                remaining_words -= 1;
-
-                                if remaining_words > 0 {
-                                    after_tokens.next();
-                                    remaining_words -= 1;
-                                }
-                            }
                         }
                     }
-                    // the end of the text is reached, advance left.
-                    (Some(before_token_kind), None) => {
-                        before_tokens.next();
-                        if matches!(before_token_kind, SimpleTokenKind::NotSeparator) {
-                            remaining_words -= 1;
-                        }
-                    }
-                    // the start of the text is reached, advance right.
-                    (None, Some(after_token_kind)) => {
-                        after_tokens.next();
-                        if matches!(after_token_kind, SimpleTokenKind::NotSeparator) {
-                            remaining_words -= 1;
-                        }
-                    }
-                    // no more token to add.
-                    (None, None) => break,
                 }
-            }
-
-            // finally, keep the byte index of each bound of the crop window.
-            let crop_byte_start = before_tokens.next().map_or(0, |t| t.byte_end);
-            let crop_byte_end = after_tokens.next().map_or(self.text.len(), |t| t.byte_start);
-
-            (crop_byte_start, crop_byte_end)
-        } else {
-            // there's one match and it's longer than the crop window, so we have to advance inward
-            let mut remaining_extra_words = matches_size - crop_size;
-            let mut tokens_from_end =
-                tokens[..=last_match_last_token_position].iter().rev().peekable();
-
-            while remaining_extra_words > 0 {
-                let token_from_end_kind = tokens_from_end
-                    .peek()
-                    .map(SimpleTokenKind::new)
-                    .expect("Expected iterator to not reach end");
-                if matches!(token_from_end_kind, SimpleTokenKind::NotSeparator) {
-                    remaining_extra_words -= 1;
+                // the end of the text is reached, advance left.
+                (Some(before_token_kind), None) => {
+                    before_tokens.next();
+                    if matches!(before_token_kind, SimpleTokenKind::NotSeparator) {
+                        remaining_words -= 1;
+                    }
                 }
-
-                tokens_from_end.next();
+                // the start of the text is reached, advance right.
+                (None, Some(after_token_kind)) => {
+                    after_tokens.next();
+                    if matches!(after_token_kind, SimpleTokenKind::NotSeparator) {
+                        remaining_words -= 1;
+                    }
+                }
+                // no more token to add.
+                (None, None) => break,
             }
-
-            let crop_byte_start = if first_match_first_token_position > 0 {
-                &tokens[first_match_first_token_position - 1].byte_end
-            } else {
-                &0
-            };
-            let crop_byte_end = tokens_from_end
-                .next()
-                .map(|t| t.byte_start)
-                .expect("Expected iterator to not reach end");
-
-            (*crop_byte_start, crop_byte_end)
         }
+
+        // finally, keep the byte index of each bound of the crop window.
+        let crop_byte_start = before_tokens.next().map_or(0, |t| t.byte_end);
+        let crop_byte_end = after_tokens.next().map_or(self.text.len(), |t| t.byte_start);
+
+        (crop_byte_start, crop_byte_end)
     }
 
     // Returns the formatted version of the original text.

From 8221c94e7f5666c73944cc5f57211a0eb4035b59 Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Thu, 3 Oct 2024 15:37:51 +0300
Subject: [PATCH 38/92] Split into multiple files, refactor

---
 .../search/new/matches/best_match_interval.rs | 139 ++++++++++
 milli/src/search/new/matches/match.rs         |  62 +++++
 milli/src/search/new/matches/mod.rs           | 244 +-----------------
 .../search/new/matches/simple_token_kind.rs   |  15 ++
 4 files changed, 230 insertions(+), 230 deletions(-)
 create mode 100644 milli/src/search/new/matches/best_match_interval.rs
 create mode 100644 milli/src/search/new/matches/match.rs
 create mode 100644 milli/src/search/new/matches/simple_token_kind.rs

diff --git a/milli/src/search/new/matches/best_match_interval.rs b/milli/src/search/new/matches/best_match_interval.rs
new file mode 100644
index 000000000..a6497f351
--- /dev/null
+++ b/milli/src/search/new/matches/best_match_interval.rs
@@ -0,0 +1,139 @@
+use super::matching_words::WordId;
+use super::{Match, MatchPosition};
+
+struct MatchIntervalWithScore {
+    interval: [usize; 2],
+    score: [i16; 3],
+}
+
+// count score for phrases
+fn tally_phrase_scores(fwp: &usize, lwp: &usize, order_score: &mut i16, distance_score: &mut i16) {
+    let words_in_phrase_minus_one = (lwp - fwp) as i16;
+    // will always be ordered, so +1 for each space between words
+    *order_score += words_in_phrase_minus_one;
+    // distance will always be 1, so -1 for each space between words
+    *distance_score -= words_in_phrase_minus_one;
+}
+
+/// Compute the score of a match interval:
+/// 1) count unique matches
+/// 2) calculate distance between matches
+/// 3) count ordered matches
+fn get_interval_score(matches: &[Match]) -> [i16; 3] {
+    let mut ids: Vec<WordId> = Vec::with_capacity(matches.len());
+    let mut order_score = 0;
+    let mut distance_score = 0;
+
+    let mut iter = matches.iter().peekable();
+    while let Some(m) = iter.next() {
+        if let Some(next_match) = iter.peek() {
+            // if matches are ordered
+            if next_match.ids.iter().min() > m.ids.iter().min() {
+                order_score += 1;
+            }
+
+            let m_last_word_pos = match m.position {
+                MatchPosition::Word { word_position, .. } => word_position,
+                MatchPosition::Phrase { word_positions: [fwp, lwp], .. } => {
+                    tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score);
+                    lwp
+                }
+            };
+            let next_match_first_word_pos = next_match.get_first_word_pos();
+
+            // compute distance between matches
+            distance_score -= (next_match_first_word_pos - m_last_word_pos).min(7) as i16;
+        } else if let MatchPosition::Phrase { word_positions: [fwp, lwp], .. } = m.position {
+            // in case last match is a phrase, count score for its words
+            tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score);
+        }
+
+        ids.extend(m.ids.iter());
+    }
+
+    ids.sort_unstable();
+    ids.dedup();
+    let uniq_score = ids.len() as i16;
+
+    // rank by unique match count, then by distance between matches, then by ordered match count.
+    [uniq_score, distance_score, order_score]
+}
+
+/// Returns the first and last match where the score computed by match_interval_score is the best.
+pub fn find_best_match_interval(matches: &[Match], crop_size: usize) -> [&Match; 2] {
+    if matches.is_empty() {
+        panic!("`matches` should not be empty at this point");
+    }
+
+    // positions of the first and the last match of the best matches interval in `matches`.
+    let mut best_interval: Option<MatchIntervalWithScore> = None;
+
+    let mut save_best_interval = |interval_first, interval_last| {
+        let interval_score = get_interval_score(&matches[interval_first..=interval_last]);
+        let is_interval_score_better = &best_interval
+            .as_ref()
+            .map_or(true, |MatchIntervalWithScore { score, .. }| interval_score > *score);
+
+        if *is_interval_score_better {
+            best_interval = Some(MatchIntervalWithScore {
+                interval: [interval_first, interval_last],
+                score: interval_score,
+            });
+        }
+    };
+
+    // we compute the matches interval if we have at least 2 matches.
+    // current interval positions.
+    let mut interval_first = 0;
+    let mut interval_first_match_first_word_pos = matches[interval_first].get_first_word_pos();
+
+    for (index, next_match) in matches.iter().enumerate() {
+        // if next match would make interval gross more than crop_size,
+        // we compare the current interval with the best one,
+        // then we increase `interval_first` until next match can be added.
+        let next_match_last_word_pos = next_match.get_last_word_pos();
+
+        // if the next match would mean that we pass the crop size window,
+        // we take the last valid match, that didn't pass this boundry, which is `index` - 1,
+        // and calculate a score for it, and check if it's better than our best so far
+        if next_match_last_word_pos - interval_first_match_first_word_pos >= crop_size {
+            // if index is 0 there is no last viable match
+            if index != 0 {
+                let interval_last = index - 1;
+                // keep interval if it's the best
+                save_best_interval(interval_first, interval_last);
+            }
+
+            // advance start of the interval while interval is longer than crop_size.
+            loop {
+                interval_first += 1;
+                if interval_first == matches.len() {
+                    interval_first -= 1;
+                    break;
+                }
+
+                interval_first_match_first_word_pos = matches[interval_first].get_first_word_pos();
+
+                if interval_first_match_first_word_pos > next_match_last_word_pos
+                    || next_match_last_word_pos - interval_first_match_first_word_pos < crop_size
+                {
+                    break;
+                }
+            }
+        }
+    }
+
+    // compute the last interval score and compare it to the best one.
+    let interval_last = matches.len() - 1;
+    // if it's the last match with itself, we need to make sure it's
+    // not a phrase longer than the crop window
+    if interval_first != interval_last || matches[interval_first].get_word_count() < crop_size {
+        save_best_interval(interval_first, interval_last);
+    }
+
+    // if none of the matches fit the criteria above, default to the first one
+    best_interval.map_or(
+        [&matches[0], &matches[0]],
+        |MatchIntervalWithScore { interval: [first, last], .. }| [&matches[first], &matches[last]],
+    )
+}
diff --git a/milli/src/search/new/matches/match.rs b/milli/src/search/new/matches/match.rs
new file mode 100644
index 000000000..cc08b006c
--- /dev/null
+++ b/milli/src/search/new/matches/match.rs
@@ -0,0 +1,62 @@
+use super::matching_words::WordId;
+
+#[derive(Clone, Debug)]
+pub enum MatchPosition {
+    Word {
+        // position of the word in the whole text.
+        word_position: usize,
+        // position of the token in the whole text.
+        token_position: usize,
+    },
+    Phrase {
+        // position of the first and last word in the phrase in the whole text.
+        word_positions: [usize; 2],
+        // position of the first and last token in the phrase in the whole text.
+        token_positions: [usize; 2],
+    },
+}
+
+#[derive(Clone, Debug)]
+pub struct Match {
+    pub match_len: usize,
+    // ids of the query words that matches.
+    pub ids: Vec<WordId>,
+    pub position: MatchPosition,
+}
+
+impl Match {
+    pub(super) fn get_first_word_pos(&self) -> usize {
+        match self.position {
+            MatchPosition::Word { word_position, .. } => word_position,
+            MatchPosition::Phrase { word_positions: [fwp, _], .. } => fwp,
+        }
+    }
+
+    pub(super) fn get_last_word_pos(&self) -> usize {
+        match self.position {
+            MatchPosition::Word { word_position, .. } => word_position,
+            MatchPosition::Phrase { word_positions: [_, lwp], .. } => lwp,
+        }
+    }
+
+    pub(super) fn get_first_token_pos(&self) -> usize {
+        match self.position {
+            MatchPosition::Word { token_position, .. } => token_position,
+            MatchPosition::Phrase { token_positions: [ftp, _], .. } => ftp,
+        }
+    }
+
+    pub(super) fn get_last_token_pos(&self) -> usize {
+        match self.position {
+            MatchPosition::Word { token_position, .. } => token_position,
+            MatchPosition::Phrase { token_positions: [_, ltp], .. } => ltp,
+        }
+    }
+
+    pub(super) fn get_word_count(&self) -> usize {
+        match self.position {
+            MatchPosition::Word { .. } => 1,
+            MatchPosition::Phrase { word_positions: [fwp, lwp], .. } => lwp - fwp + 1,
+        }
+    }
+}
diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index f8d60ef54..3df361702 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -1,12 +1,16 @@
-use std::borrow::Cow;
+mod best_match_interval;
+mod r#match;
+mod matching_words;
+mod simple_token_kind;
 
-use charabia::{Language, SeparatorKind, Token, TokenKind, Tokenizer};
+use charabia::{Language, SeparatorKind, Token, Tokenizer};
 use either::Either;
 pub use matching_words::MatchingWords;
-use matching_words::{MatchType, PartialMatch, WordId};
+use matching_words::{MatchType, PartialMatch};
+use r#match::{Match, MatchPosition};
 use serde::Serialize;
-
-pub mod matching_words;
+use simple_token_kind::SimpleTokenKind;
+use std::borrow::Cow;
 
 const DEFAULT_CROP_MARKER: &str = "…";
 const DEFAULT_HIGHLIGHT_PREFIX: &str = "<em>";
@@ -94,228 +98,12 @@ impl FormatOptions {
     }
 }
 
-#[derive(Clone, Debug)]
-pub enum MatchPosition {
-    Word {
-        // position of the word in the whole text.
-        word_position: usize,
-        // position of the token in the whole text.
-        token_position: usize,
-    },
-    Phrase {
-        // position of the first and last word in the phrase in the whole text.
-        word_positions: (usize, usize),
-        // position of the first and last token in the phrase in the whole text.
-        token_positions: (usize, usize),
-    },
-}
-
-#[derive(Clone, Debug)]
-pub struct Match {
-    match_len: usize,
-    // ids of the query words that matches.
-    ids: Vec<WordId>,
-    position: MatchPosition,
-}
-
-impl Match {
-    fn get_first_word_pos(&self) -> usize {
-        match self.position {
-            MatchPosition::Word { word_position, .. } => word_position,
-            MatchPosition::Phrase { word_positions: (fwp, _), .. } => fwp,
-        }
-    }
-
-    fn get_last_word_pos(&self) -> usize {
-        match self.position {
-            MatchPosition::Word { word_position, .. } => word_position,
-            MatchPosition::Phrase { word_positions: (_, lwp), .. } => lwp,
-        }
-    }
-
-    fn get_first_token_pos(&self) -> usize {
-        match self.position {
-            MatchPosition::Word { token_position, .. } => token_position,
-            MatchPosition::Phrase { token_positions: (ftp, _), .. } => ftp,
-        }
-    }
-
-    fn get_last_token_pos(&self) -> usize {
-        match self.position {
-            MatchPosition::Word { token_position, .. } => token_position,
-            MatchPosition::Phrase { token_positions: (_, ltp), .. } => ltp,
-        }
-    }
-
-    fn get_word_count(&self) -> usize {
-        match self.position {
-            MatchPosition::Word { .. } => 1,
-            MatchPosition::Phrase { word_positions: (fwp, lwp), .. } => lwp - fwp + 1,
-        }
-    }
-}
-
 #[derive(Serialize, Debug, Clone, PartialEq, Eq)]
 pub struct MatchBounds {
     pub start: usize,
     pub length: usize,
 }
 
-enum SimpleTokenKind {
-    Separator(SeparatorKind),
-    NotSeparator,
-}
-
-impl SimpleTokenKind {
-    fn new(token: &&Token<'_>) -> Self {
-        match token.kind {
-            TokenKind::Separator(separaor_kind) => Self::Separator(separaor_kind),
-            _ => Self::NotSeparator,
-        }
-    }
-}
-
-#[derive(PartialEq, PartialOrd)]
-struct MatchIntervalScore(i16, i16, i16);
-
-impl MatchIntervalScore {
-    /// Compute the score of a match interval:
-    /// 1) count unique matches
-    /// 2) calculate distance between matches
-    /// 3) count ordered matches
-    fn new(matches: &[Match]) -> Self {
-        let mut ids: Vec<WordId> = Vec::with_capacity(matches.len());
-        let mut order_score = 0;
-        let mut distance_score = 0;
-
-        // count score for phrases
-        fn tally_phrase_scores(
-            fwp: &usize,
-            lwp: &usize,
-            order_score: &mut i16,
-            distance_score: &mut i16,
-        ) {
-            let words_in_phrase_minus_one = (lwp - fwp) as i16;
-            // will always be ordered, so +1 for each space between words
-            *order_score += words_in_phrase_minus_one;
-            // distance will always be 1, so -1 for each space between words
-            *distance_score -= words_in_phrase_minus_one;
-        }
-
-        let mut iter = matches.iter().peekable();
-        while let Some(m) = iter.next() {
-            if let Some(next_match) = iter.peek() {
-                // if matches are ordered
-                if next_match.ids.iter().min() > m.ids.iter().min() {
-                    order_score += 1;
-                }
-
-                let m_last_word_pos = match m.position {
-                    MatchPosition::Word { word_position, .. } => word_position,
-                    MatchPosition::Phrase { word_positions: (fwp, lwp), .. } => {
-                        tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score);
-                        lwp
-                    }
-                };
-                let next_match_first_word_pos = next_match.get_first_word_pos();
-
-                // compute distance between matches
-                distance_score -= (next_match_first_word_pos - m_last_word_pos).min(7) as i16;
-            } else if let MatchPosition::Phrase { word_positions: (fwp, lwp), .. } = m.position {
-                // in case last match is a phrase, count score for its words
-                tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score);
-            }
-
-            ids.extend(m.ids.iter());
-        }
-
-        ids.sort_unstable();
-        ids.dedup();
-        let uniq_score = ids.len() as i16;
-
-        // rank by unique match count, then by distance between matches, then by ordered match count.
-        Self(uniq_score, distance_score, order_score)
-    }
-}
-
-struct MatchIntervalWithScore {
-    interval: (usize, usize),
-    score: MatchIntervalScore,
-}
-
-impl MatchIntervalWithScore {
-    /// Returns the matches interval where the score computed by match_interval_score is the best.
-    fn find_best_match_interval(matches: &[Match], crop_size: usize) -> &[Match] {
-        if matches.len() <= 1 {
-            return matches;
-        }
-
-        // positions of the first and the last match of the best matches interval in `matches`.
-        let mut best_interval: Option<Self> = None;
-
-        let mut save_best_interval = |interval_first, interval_last| {
-            let interval_score = MatchIntervalScore::new(&matches[interval_first..=interval_last]);
-            let is_interval_score_better =
-                &best_interval.as_ref().map_or(true, |Self { score, .. }| interval_score > *score);
-
-            if *is_interval_score_better {
-                best_interval =
-                    Some(Self { interval: (interval_first, interval_last), score: interval_score });
-            }
-        };
-
-        // we compute the matches interval if we have at least 2 matches.
-        // current interval positions.
-        let mut interval_first = 0;
-        let mut interval_first_match_first_word_pos = matches[interval_first].get_first_word_pos();
-
-        for (index, next_match) in matches.iter().enumerate() {
-            // if next match would make interval gross more than crop_size,
-            // we compare the current interval with the best one,
-            // then we increase `interval_first` until next match can be added.
-            let next_match_last_word_pos = next_match.get_last_word_pos();
-
-            // if the next match would mean that we pass the crop size window,
-            // we take the last valid match, that didn't pass this boundry, which is `index` - 1,
-            // and calculate a score for it, and check if it's better than our best so far
-            if next_match_last_word_pos - interval_first_match_first_word_pos >= crop_size {
-                // if index is 0 there is no last viable match
-                if index != 0 {
-                    let interval_last = index - 1;
-                    // keep interval if it's the best
-                    save_best_interval(interval_first, interval_last);
-                }
-
-                // advance start of the interval while interval is longer than crop_size.
-                loop {
-                    interval_first += 1;
-                    interval_first_match_first_word_pos =
-                        matches[interval_first].get_first_word_pos();
-
-                    if interval_first_match_first_word_pos > next_match_last_word_pos
-                        || next_match_last_word_pos - interval_first_match_first_word_pos
-                            < crop_size
-                    {
-                        break;
-                    }
-                }
-            }
-        }
-
-        // compute the last interval score and compare it to the best one.
-        let interval_last = matches.len() - 1;
-        // if it's the last match with itself, we need to make sure it's
-        // not a phrase longer than the crop window
-        if interval_first != interval_last || matches[interval_first].get_word_count() < crop_size {
-            save_best_interval(interval_first, interval_last);
-        }
-
-        // if none of the matches fit the criteria above, default to the first one
-        let best_interval = best_interval.map_or((0, 0), |v| v.interval);
-        &matches[best_interval.0..=best_interval.1]
-    }
-}
-
 /// Structure used to analyze a string, compute words that match,
 /// and format the source string, returning a highlighted and cropped sub-string.
 pub struct Matcher<'t, 'tokenizer, 'b, 'lang> {
@@ -355,8 +143,8 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                             match_len: word.char_end - *first_word_char_start,
                             ids: ids.clone().collect(),
                             position: MatchPosition::Phrase {
-                                word_positions: (first_word_position, word_position),
-                                token_positions: (first_token_position, token_position),
+                                word_positions: [first_word_position, word_position],
+                                token_positions: [first_token_position, token_position],
                             },
                         });
 
@@ -450,15 +238,14 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
         matches: &[Match],
         crop_size: usize,
     ) -> (usize, usize) {
-        // if there is no match, we start from the beginning of the string by default.
         let (
             mut remaining_words,
             is_iterating_forward,
             before_tokens_starting_index,
             after_tokens_starting_index,
         ) = if !matches.is_empty() {
-            let matches_first = matches.first().unwrap();
-            let matches_last = matches.last().unwrap();
+            let [matches_first, matches_last] =
+                best_match_interval::find_best_match_interval(matches, crop_size);
 
             let matches_size =
                 matches_last.get_last_word_pos() - matches_first.get_first_word_pos() + 1;
@@ -600,9 +387,6 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                     // crop around the best interval.
                     let (byte_start, byte_end) = match format_options.crop {
                         Some(crop_size) if crop_size > 0 => {
-                            let matches = MatchIntervalWithScore::find_best_match_interval(
-                                matches, crop_size,
-                            );
                             self.crop_bounds(tokens, matches, crop_size)
                         }
                         _ => (0, self.text.len()),
@@ -625,7 +409,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                                     let token = &tokens[token_position];
                                     (&token.byte_start, &token.byte_end)
                                 }
-                                MatchPosition::Phrase { token_positions: (ftp, ltp), .. } => {
+                                MatchPosition::Phrase { token_positions: [ftp, ltp], .. } => {
                                     (&tokens[ftp].byte_start, &tokens[ltp].byte_end)
                                 }
                             };
diff --git a/milli/src/search/new/matches/simple_token_kind.rs b/milli/src/search/new/matches/simple_token_kind.rs
new file mode 100644
index 000000000..b34a8c985
--- /dev/null
+++ b/milli/src/search/new/matches/simple_token_kind.rs
@@ -0,0 +1,15 @@
+use charabia::{SeparatorKind, Token, TokenKind};
+
+pub enum SimpleTokenKind {
+    Separator(SeparatorKind),
+    NotSeparator,
+}
+
+impl SimpleTokenKind {
+    pub fn new(token: &&Token<'_>) -> Self {
+        match token.kind {
+            TokenKind::Separator(separaor_kind) => Self::Separator(separaor_kind),
+            _ => Self::NotSeparator,
+        }
+    }
+}

From c3de3a9ab75e6be99314400137b8329cdf46ff12 Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Fri, 4 Oct 2024 11:30:31 +0300
Subject: [PATCH 39/92] Refactor

---
 milli/src/search/new/matches/matching_words.rs | 12 +++---------
 milli/src/search/new/matches/mod.rs            |  1 -
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/milli/src/search/new/matches/matching_words.rs b/milli/src/search/new/matches/matching_words.rs
index 4deaff6a0..e4d2785ca 100644
--- a/milli/src/search/new/matches/matching_words.rs
+++ b/milli/src/search/new/matches/matching_words.rs
@@ -130,7 +130,7 @@ impl<'a> Iterator for MatchesIter<'a, '_> {
                         word.map(|word| self.matching_words.word_interner.get(word).as_str())
                     })
                     .collect();
-                let partial = PartialMatch { matching_words: words, ids, char_len: 0 };
+                let partial = PartialMatch { matching_words: words, ids };
 
                 partial.match_token(self.token).or_else(|| self.next())
             }
@@ -158,7 +158,6 @@ pub enum MatchType<'a> {
 pub struct PartialMatch<'a> {
     matching_words: Vec<Option<&'a str>>,
     ids: &'a RangeInclusive<WordId>,
-    char_len: usize,
 }
 
 impl<'a> PartialMatch<'a> {
@@ -176,25 +175,20 @@ impl<'a> PartialMatch<'a> {
             None => token.is_stopword(),
         };
 
-        let char_len = token.char_end - token.char_start;
         // if there are remaining words to match in the phrase and the current token is matching,
         // return a new Partial match allowing the highlighter to continue.
         if is_matching && matching_words.len() > 1 {
             matching_words.remove(0);
-            Some(MatchType::Partial(Self { matching_words, ids, char_len }))
+            Some(MatchType::Partial(Self { matching_words, ids }))
         // if there is no remaining word to match in the phrase and the current token is matching,
         // return a Full match.
         } else if is_matching {
-            Some(MatchType::Full { char_len, ids })
+            Some(MatchType::Full { char_len: token.char_end - token.char_start, ids })
         // if the current token doesn't match, return None to break the match sequence.
         } else {
             None
         }
     }
-
-    pub fn char_len(&self) -> usize {
-        self.char_len
-    }
 }
 
 impl fmt::Debug for MatchingWords {
diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index 3df361702..9ca560529 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -139,7 +139,6 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                     Some(MatchType::Full { ids, .. }) => {
                         // save the token that closes the partial match as a match.
                         matches.push(Match {
-                            // @TODO: Shouldn't this be +1?
                             match_len: word.char_end - *first_word_char_start,
                             ids: ids.clone().collect(),
                             position: MatchPosition::Phrase {

From 03579aba13853560059cec3c881e284b4f7a307a Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Fri, 4 Oct 2024 11:38:47 +0300
Subject: [PATCH 40/92] Adjust test

---
 milli/src/search/new/matches/mod.rs | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index 9ca560529..ac0fb7e7b 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -798,12 +798,12 @@ mod tests {
             @"…the power to split <em>the world</em> between those who embraced…"
         );
 
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"power to\" \"and those\"");
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\"");
         let mut matcher = builder.build(text, None);
         // should highlight "those" and the phrase "and those".
         insta::assert_snapshot!(
             matcher.format(format_options),
-            @"…groundbreaking invention had the <em>power to</em> split the world between…"
+            @"…world between <em>those</em> who embraced progress <em>and those</em> who resisted…"
         );
 
         let builder = MatcherBuilder::new_test(
@@ -841,17 +841,6 @@ mod tests {
             @"…between those who <em>embraced progress and those who resisted change</em>…"
         );
 
-        let builder = MatcherBuilder::new_test(
-            &rtxn,
-            &temp_index,
-            "\"The groundbreaking invention\" \"split the world between those\"",
-        );
-        let mut matcher = builder.build(text, None);
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…the power to <em>split the world between those</em> who embraced…"
-        );
-
         let builder = MatcherBuilder::new_test(
             &rtxn,
             &temp_index,

From 7f5d0837c3343b9ce154197867bd153b12390e5c Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 9 Oct 2024 11:46:57 +0200
Subject: [PATCH 41/92] fix the bad experimental search queue size

---
 meilisearch/src/option.rs | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs
index 3799bdcb7..82c783115 100644
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@@ -357,8 +357,8 @@ pub struct Opt {
     /// Lets you customize the size of the search queue. Meilisearch processes your search requests as fast as possible but once the
     /// queue is full it starts returning HTTP 503, Service Unavailable.
     /// The default value is 1000.
-    #[clap(long, env = MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE, default_value_t = 1000)]
-    #[serde(default)]
+    #[clap(long, env = MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE, default_value_t = default_experimental_search_queue_size())]
+    #[serde(default = "default_experimental_search_queue_size")]
     pub experimental_search_queue_size: usize,
 
     /// Experimental logs mode feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/723>
@@ -890,6 +890,10 @@ fn default_dump_dir() -> PathBuf {
     PathBuf::from(DEFAULT_DUMP_DIR)
 }
 
+fn default_experimental_search_queue_size() -> usize {
+    1000
+}
+
 /// Indicates if a snapshot was scheduled, and if yes with which interval.
 #[derive(Debug, Default, Copy, Clone, Deserialize, Serialize)]
 pub enum ScheduleSnapshot {

From 6e37ae8619ebf52aa1f9a703fc12723764f4ebe5 Mon Sep 17 00:00:00 2001
From: curquiza <clementine@meilisearch.com>
Date: Wed, 9 Oct 2024 19:13:14 +0200
Subject: [PATCH 42/92] Update mini-dashboard

---
 meilisearch/Cargo.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml
index c193c89d4..6c2fb4060 100644
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -157,5 +157,5 @@ german = ["meilisearch-types/german"]
 turkish = ["meilisearch-types/turkish"]
 
 [package.metadata.mini-dashboard]
-assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip"
-sha1 = "592d1b5a3459d621d0aae1dded8fe3154f5c38fe"
+assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.15/build.zip"
+sha1 = "d057600b4a839a2e0c0be7a372cd1b2683f3ca7e"

From 466604725ec017234db3e61c58c957a3802d2bb9 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 10 Oct 2024 23:47:15 +0200
Subject: [PATCH 43/92] Do not send empty edit document by function

---
 meilisearch/src/analytics/segment_analytics.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index f8d6a0fdc..0ea0de572 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -1572,6 +1572,10 @@ impl EditDocumentsByFunctionAggregator {
     pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
         let Self { timestamp, user_agents, index_creation, filtered, with_context } = self;
 
+        // if we had no timestamp it means we never encountered any events and
+        // thus we don't need to send this event.
+        let timestamp = timestamp?;
+
         let properties = json!({
             "user-agent": user_agents,
             "filtered": filtered,
@@ -1580,7 +1584,7 @@ impl EditDocumentsByFunctionAggregator {
         });
 
         Some(Track {
-            timestamp,
+            timestamp: Some(timestamp),
             user: user.clone(),
             event: event_name.to_string(),
             properties,

From 92070a3578ded5a78bb42e8fb0ab02242fd11bc4 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 10 Oct 2024 13:17:25 +0200
Subject: [PATCH 44/92] Implement the experimental drop search after and nb
 search per core

---
 .../src/analytics/segment_analytics.rs        |  6 +++
 meilisearch/src/main.rs                       | 11 +++++-
 meilisearch/src/option.rs                     | 38 ++++++++++++++++++-
 3 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 0ea0de572..476b3264e 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -265,6 +265,8 @@ struct Infos {
     experimental_contains_filter: bool,
     experimental_enable_metrics: bool,
     experimental_search_queue_size: usize,
+    experimental_drop_search_after: usize,
+    experimental_nb_searches_per_core: usize,
     experimental_logs_mode: LogMode,
     experimental_replication_parameters: bool,
     experimental_enable_logs_route: bool,
@@ -308,6 +310,8 @@ impl From<Opt> for Infos {
             experimental_contains_filter,
             experimental_enable_metrics,
             experimental_search_queue_size,
+            experimental_drop_search_after,
+            experimental_nb_searches_per_core,
             experimental_logs_mode,
             experimental_replication_parameters,
             experimental_enable_logs_route,
@@ -359,6 +363,8 @@ impl From<Opt> for Infos {
             experimental_contains_filter,
             experimental_enable_metrics,
             experimental_search_queue_size,
+            experimental_drop_search_after: experimental_drop_search_after.into(),
+            experimental_nb_searches_per_core: experimental_nb_searches_per_core.into(),
             experimental_logs_mode,
             experimental_replication_parameters,
             experimental_enable_logs_route,
diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs
index b66bfc5b8..de9784d15 100644
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
@@ -5,6 +5,7 @@ use std::path::PathBuf;
 use std::str::FromStr;
 use std::sync::Arc;
 use std::thread::available_parallelism;
+use std::time::Duration;
 
 use actix_web::http::KeepAlive;
 use actix_web::web::Data;
@@ -153,8 +154,14 @@ async fn run_http(
     let auth_controller = Data::from(auth_controller);
     let search_queue = SearchQueue::new(
         opt.experimental_search_queue_size,
-        available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()),
-    );
+        available_parallelism()
+            .unwrap_or(NonZeroUsize::new(2).unwrap())
+            .checked_mul(opt.experimental_nb_searches_per_core)
+            .unwrap_or(NonZeroUsize::MAX),
+    )
+    .with_time_to_abort(Duration::from_secs(
+        usize::from(opt.experimental_drop_search_after) as u64
+    ));
     let search_queue = Data::new(search_queue);
 
     let http_server = HttpServer::new(move || {
diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs
index 82c783115..bbeb94577 100644
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@@ -2,7 +2,7 @@ use std::env::VarError;
 use std::ffi::OsStr;
 use std::fmt::Display;
 use std::io::{BufReader, Read};
-use std::num::ParseIntError;
+use std::num::{NonZeroUsize, ParseIntError};
 use std::ops::Deref;
 use std::path::PathBuf;
 use std::str::FromStr;
@@ -55,6 +55,8 @@ const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LO
 const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER";
 const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
 const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
+const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
+const MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE: &str = "MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE";
 const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
     "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
 const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
@@ -361,6 +363,22 @@ pub struct Opt {
     #[serde(default = "default_experimental_search_queue_size")]
     pub experimental_search_queue_size: usize,
 
+    /// Experimental drop search after. For more information, see: <https://github.com/orgs/meilisearch/discussions/783>
+    ///
+    /// Lets you customize after how much seconds should Meilisearch consider a search as irrelevant and drop it.
+    /// The default value is 60.
+    #[clap(long, env = MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER, default_value_t = default_drop_search_after())]
+    #[serde(default = "default_drop_search_after")]
+    pub experimental_drop_search_after: NonZeroUsize,
+
+    /// Experimental number of searches per core. For more information, see: <https://github.com/orgs/meilisearch/discussions/784>
+    ///
+    /// Lets you customize after how many search requests can run on each cores.
+    /// The default value is 4.
+    #[clap(long, env = MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE, default_value_t = default_nb_searches_per_core())]
+    #[serde(default = "default_drop_search_after")]
+    pub experimental_nb_searches_per_core: NonZeroUsize,
+
     /// Experimental logs mode feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/723>
     ///
     /// Change the mode of the logs on the console.
@@ -492,6 +510,8 @@ impl Opt {
             experimental_contains_filter,
             experimental_enable_metrics,
             experimental_search_queue_size,
+            experimental_drop_search_after,
+            experimental_nb_searches_per_core,
             experimental_logs_mode,
             experimental_enable_logs_route,
             experimental_replication_parameters,
@@ -559,6 +579,14 @@ impl Opt {
             MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE,
             experimental_search_queue_size.to_string(),
         );
+        export_to_env_if_not_present(
+            MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER,
+            experimental_drop_search_after.to_string(),
+        );
+        export_to_env_if_not_present(
+            MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE,
+            experimental_nb_searches_per_core.to_string(),
+        );
         export_to_env_if_not_present(
             MEILI_EXPERIMENTAL_LOGS_MODE,
             experimental_logs_mode.to_string(),
@@ -894,6 +922,14 @@ fn default_experimental_search_queue_size() -> usize {
     1000
 }
 
+fn default_drop_search_after() -> NonZeroUsize {
+    NonZeroUsize::new(60).unwrap()
+}
+
+fn default_nb_searches_per_core() -> NonZeroUsize {
+    NonZeroUsize::new(4).unwrap()
+}
+
 /// Indicates if a snapshot was scheduled, and if yes with which interval.
 #[derive(Debug, Default, Copy, Clone, Deserialize, Serialize)]
 pub enum ScheduleSnapshot {

From c32282acb1f14e65bb124003c34fa1de9c01f869 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 10 Oct 2024 13:21:18 +0200
Subject: [PATCH 45/92] improve doc

---
 meilisearch/src/option.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs
index bbeb94577..a231eb058 100644
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@@ -365,7 +365,7 @@ pub struct Opt {
 
     /// Experimental drop search after. For more information, see: <https://github.com/orgs/meilisearch/discussions/783>
     ///
-    /// Lets you customize after how much seconds should Meilisearch consider a search as irrelevant and drop it.
+    /// Let you customize after how many seconds Meilisearch should consider a search as irrelevant and drop it.
     /// The default value is 60.
     #[clap(long, env = MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER, default_value_t = default_drop_search_after())]
     #[serde(default = "default_drop_search_after")]
@@ -373,7 +373,7 @@ pub struct Opt {
 
     /// Experimental number of searches per core. For more information, see: <https://github.com/orgs/meilisearch/discussions/784>
     ///
-    /// Lets you customize after how many search requests can run on each cores.
+    /// Lets you customize how many search requests can run on each core.
     /// The default value is 4.
     #[clap(long, env = MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE, default_value_t = default_nb_searches_per_core())]
     #[serde(default = "default_drop_search_after")]

From c4efd1df4e70b2929ee1cb1c22b535b7ff163cc7 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 10 Oct 2024 13:40:21 +0200
Subject: [PATCH 46/92] Update meilisearch/src/option.rs

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
---
 meilisearch/src/option.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs
index a231eb058..cef787e1a 100644
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@@ -376,7 +376,7 @@ pub struct Opt {
     /// Lets you customize how many search requests can run on each core.
     /// The default value is 4.
     #[clap(long, env = MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE, default_value_t = default_nb_searches_per_core())]
-    #[serde(default = "default_drop_search_after")]
+    #[serde(default = "default_nb_searches_per_core")]
     pub experimental_nb_searches_per_core: NonZeroUsize,
 
     /// Experimental logs mode feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/723>

From 3085092e04cbc909601b8b290d883b35ff541f89 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 10 Oct 2024 13:40:28 +0200
Subject: [PATCH 47/92] Update meilisearch/src/option.rs

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
---
 meilisearch/src/option.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs
index cef787e1a..b3f01d208 100644
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@@ -373,7 +373,7 @@ pub struct Opt {
 
     /// Experimental number of searches per core. For more information, see: <https://github.com/orgs/meilisearch/discussions/784>
     ///
-    /// Lets you customize how many search requests can run on each core.
+    /// Lets you customize how many search requests can run on each core concurrently.
     /// The default value is 4.
     #[clap(long, env = MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE, default_value_t = default_nb_searches_per_core())]
     #[serde(default = "default_nb_searches_per_core")]

From 4b4a6c78638573721d7b88869fd443236f90d29a Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 10 Oct 2024 15:24:24 +0200
Subject: [PATCH 48/92] Update meilisearch/src/option.rs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Clément Renault <clement@meilisearch.com>
---
 meilisearch/src/option.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs
index b3f01d208..02dc660a4 100644
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@@ -365,7 +365,7 @@ pub struct Opt {
 
     /// Experimental drop search after. For more information, see: <https://github.com/orgs/meilisearch/discussions/783>
     ///
-    /// Let you customize after how many seconds Meilisearch should consider a search as irrelevant and drop it.
+    /// Let you customize after how many seconds Meilisearch should consider a search request irrelevant and drop it.
     /// The default value is 60.
     #[clap(long, env = MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER, default_value_t = default_drop_search_after())]
     #[serde(default = "default_drop_search_after")]

From e44e7b5e81e8644ae1c95d3a3b28f530fcc52eb2 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 14 Oct 2024 16:17:19 +0200
Subject: [PATCH 49/92] Fix retrieveVectors when explicitly passed in displayed
 attributes without any document containing _vectors

---
 meilisearch/src/search/mod.rs | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs
index 66b6e56de..7832c1761 100644
--- a/meilisearch/src/search/mod.rs
+++ b/meilisearch/src/search/mod.rs
@@ -1195,8 +1195,13 @@ impl<'a> HitMaker<'a> {
         let vectors_is_hidden = match (&displayed_ids, vectors_fid) {
             // displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid
             (None, _) => false,
-            // displayed_ids is a finite list, and `_vectors` cannot be part of it because it is not an existing field
-            (Some(_), None) => true,
+            // vectors has no fid, so check its explicit name
+            (Some(_), None) => {
+                // unwrap as otherwise we'd go to the first one
+                let displayed_names = index.displayed_fields(rtxn)?.unwrap();
+                !displayed_names
+                    .contains(&milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME)
+            }
             // displayed_ids is a finit list, so hide if `_vectors` is not part of it
             (Some(map), Some(vectors_fid)) => map.contains(&vectors_fid),
         };

From 5a74d4729cdc02a3cea011d4ab6a0f608be867f9 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 14 Oct 2024 16:23:28 +0200
Subject: [PATCH 50/92] Add test failing before this PR, OK now

---
 meilisearch/tests/search/hybrid.rs | 51 ++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs
index e301c0b05..00a65d9aa 100644
--- a/meilisearch/tests/search/hybrid.rs
+++ b/meilisearch/tests/search/hybrid.rs
@@ -568,6 +568,57 @@ async fn retrieve_vectors() {
     ]
     "###);
 
+    // use explicit `_vectors` in displayed attributes
+    let (response, code) = index
+        .update_settings(json!({ "displayedAttributes": ["id", "title", "desc", "_vectors"]} ))
+        .await;
+    assert_eq!(202, code, "{:?}", response);
+    index.wait_task(response.uid()).await;
+
+    let (response, code) = index
+        .search_post(
+            json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
+        )
+        .await;
+    snapshot!(code, @"200 OK");
+    insta::assert_json_snapshot!(response["hits"], {"[]._vectors.default.embeddings" => "[vectors]"},  @r###"
+    [
+      {
+        "title": "Captain Planet",
+        "desc": "He's not part of the Marvel Cinematic Universe",
+        "id": "2",
+        "_vectors": {
+          "default": {
+            "embeddings": "[vectors]",
+            "regenerate": true
+          }
+        }
+      },
+      {
+        "title": "Captain Marvel",
+        "desc": "a Shazam ersatz",
+        "id": "3",
+        "_vectors": {
+          "default": {
+            "embeddings": "[vectors]",
+            "regenerate": true
+          }
+        }
+      },
+      {
+        "title": "Shazam!",
+        "desc": "a Captain Marvel ersatz",
+        "id": "1",
+        "_vectors": {
+          "default": {
+            "embeddings": "[vectors]",
+            "regenerate": true
+          }
+        }
+      }
+    ]
+    "###);
+
     // remove `_vectors` from displayed attributes
     let (response, code) =
         index.update_settings(json!({ "displayedAttributes": ["id", "title", "desc"]} )).await;

From 73e87c152a4bd35fd4309141615676210c6b279c Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 16 Oct 2024 15:43:27 +0200
Subject: [PATCH 51/92] rewrite most of the analytics especially the settings

---
 meilisearch/src/analytics/mock_analytics.rs   | 109 --
 meilisearch/src/analytics/mod.rs              | 179 ++--
 .../src/analytics/segment_analytics.rs        | 211 ++--
 meilisearch/src/lib.rs                        |   4 +-
 meilisearch/src/routes/dump.rs                |   7 +-
 meilisearch/src/routes/features.rs            |  58 +-
 meilisearch/src/routes/indexes/documents.rs   | 318 +++++-
 .../src/routes/indexes/facet_search.rs        | 112 +-
 meilisearch/src/routes/indexes/mod.rs         |  53 +-
 meilisearch/src/routes/indexes/search.rs      |  13 +-
 meilisearch/src/routes/indexes/settings.rs    | 962 +++++++++++++-----
 meilisearch/src/routes/swap_indexes.rs        |   2 +-
 12 files changed, 1381 insertions(+), 647 deletions(-)
 delete mode 100644 meilisearch/src/analytics/mock_analytics.rs

diff --git a/meilisearch/src/analytics/mock_analytics.rs b/meilisearch/src/analytics/mock_analytics.rs
deleted file mode 100644
index 54b8d4f1b..000000000
--- a/meilisearch/src/analytics/mock_analytics.rs
+++ /dev/null
@@ -1,109 +0,0 @@
-use std::any::Any;
-use std::sync::Arc;
-
-use actix_web::HttpRequest;
-use meilisearch_types::InstanceUid;
-use serde_json::Value;
-
-use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind};
-use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery};
-use crate::Opt;
-
-pub struct MockAnalytics {
-    instance_uid: Option<InstanceUid>,
-}
-
-#[derive(Default)]
-pub struct SearchAggregator;
-
-#[allow(dead_code)]
-impl SearchAggregator {
-    pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
-        Self
-    }
-
-    pub fn succeed(&mut self, _: &dyn Any) {}
-}
-
-#[derive(Default)]
-pub struct SimilarAggregator;
-
-#[allow(dead_code)]
-impl SimilarAggregator {
-    pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
-        Self
-    }
-
-    pub fn succeed(&mut self, _: &dyn Any) {}
-}
-
-#[derive(Default)]
-pub struct MultiSearchAggregator;
-
-#[allow(dead_code)]
-impl MultiSearchAggregator {
-    pub fn from_federated_search(_: &dyn Any, _: &dyn Any) -> Self {
-        Self
-    }
-
-    pub fn succeed(&mut self) {}
-}
-
-#[derive(Default)]
-pub struct FacetSearchAggregator;
-
-#[allow(dead_code)]
-impl FacetSearchAggregator {
-    pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
-        Self
-    }
-
-    pub fn succeed(&mut self, _: &dyn Any) {}
-}
-
-impl MockAnalytics {
-    #[allow(clippy::new_ret_no_self)]
-    pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
-        let instance_uid = find_user_id(&opt.db_path);
-        Arc::new(Self { instance_uid })
-    }
-}
-
-impl Analytics for MockAnalytics {
-    fn instance_uid(&self) -> Option<&meilisearch_types::InstanceUid> {
-        self.instance_uid.as_ref()
-    }
-
-    // These methods are noop and should be optimized out
-    fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
-    fn get_search(&self, _aggregate: super::SearchAggregator) {}
-    fn post_search(&self, _aggregate: super::SearchAggregator) {}
-    fn get_similar(&self, _aggregate: super::SimilarAggregator) {}
-    fn post_similar(&self, _aggregate: super::SimilarAggregator) {}
-    fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
-    fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
-    fn add_documents(
-        &self,
-        _documents_query: &UpdateDocumentsQuery,
-        _index_creation: bool,
-        _request: &HttpRequest,
-    ) {
-    }
-    fn delete_documents(&self, _kind: DocumentDeletionKind, _request: &HttpRequest) {}
-    fn update_documents(
-        &self,
-        _documents_query: &UpdateDocumentsQuery,
-        _index_creation: bool,
-        _request: &HttpRequest,
-    ) {
-    }
-    fn update_documents_by_function(
-        &self,
-        _documents_query: &DocumentEditionByFunction,
-        _index_creation: bool,
-        _request: &HttpRequest,
-    ) {
-    }
-    fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
-    fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
-}
diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs
index 3c7ca0ed3..a8658d830 100644
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@@ -1,45 +1,51 @@
-mod mock_analytics;
-#[cfg(feature = "analytics")]
-mod segment_analytics;
+pub mod segment_analytics;
 
+use std::any::TypeId;
+use std::collections::HashMap;
 use std::fs;
 use std::path::{Path, PathBuf};
 use std::str::FromStr;
 
 use actix_web::HttpRequest;
 use meilisearch_types::InstanceUid;
-pub use mock_analytics::MockAnalytics;
 use once_cell::sync::Lazy;
 use platform_dirs::AppDirs;
-use serde_json::Value;
-
-use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery};
-
-// if the analytics feature is disabled
-// the `SegmentAnalytics` point to the mock instead of the real analytics
-#[cfg(not(feature = "analytics"))]
-pub type SegmentAnalytics = mock_analytics::MockAnalytics;
-#[cfg(not(feature = "analytics"))]
-pub type SearchAggregator = mock_analytics::SearchAggregator;
-#[cfg(not(feature = "analytics"))]
-pub type SimilarAggregator = mock_analytics::SimilarAggregator;
-#[cfg(not(feature = "analytics"))]
-pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
-#[cfg(not(feature = "analytics"))]
-pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;
+use segment::message::User;
+use serde::Serialize;
 
 // if the feature analytics is enabled we use the real analytics
-#[cfg(feature = "analytics")]
 pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
-#[cfg(feature = "analytics")]
-pub type SearchAggregator = segment_analytics::SearchAggregator;
-#[cfg(feature = "analytics")]
+pub use segment_analytics::SearchAggregator;
 pub type SimilarAggregator = segment_analytics::SimilarAggregator;
-#[cfg(feature = "analytics")]
 pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
-#[cfg(feature = "analytics")]
 pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
 
+/// A macro used to quickly define events that don't aggregate or send anything besides an empty event with its name.
+#[macro_export]
+macro_rules! empty_analytics {
+    ($struct_name:ident, $event_name:literal) => {
+        #[derive(Default)]
+        struct $struct_name {}
+
+        impl $crate::analytics::Aggregate for $struct_name {
+            fn event_name(&self) -> &'static str {
+                $event_name
+            }
+
+            fn aggregate(self, other: Self) -> Self
+            where
+                Self: Sized,
+            {
+                self
+            }
+
+            fn into_event(self) -> serde_json::Value {
+                serde_json::json!({})
+            }
+        }
+    };
+}
+
 /// The Meilisearch config dir:
 /// `~/.config/Meilisearch` on *NIX or *BSD.
 /// `~/Library/ApplicationSupport` on macOS.
@@ -78,60 +84,73 @@ pub enum DocumentFetchKind {
     Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
 }
 
-pub trait Analytics: Sync + Send {
-    fn instance_uid(&self) -> Option<&InstanceUid>;
+pub trait Aggregate {
+    fn event_name(&self) -> &'static str;
+
+    fn aggregate(self, other: Self) -> Self
+    where
+        Self: Sized;
+
+    fn into_event(self) -> impl Serialize
+    where
+        Self: Sized;
+}
+
+/// Helper trait to define multiple aggregate with the same content but a different name.
+/// Commonly used when you must aggregate a search with POST or with GET for example.
+pub trait AggregateMethod {
+    fn event_name() -> &'static str;
+}
+
+/// A macro used to quickly define multiple aggregate method with their name
+#[macro_export]
+macro_rules! aggregate_methods {
+    ($method:ident => $event_name:literal) => {
+        pub enum $method {}
+
+        impl $crate::analytics::AggregateMethod for $method {
+            fn event_name() -> &'static str {
+                $event_name
+            }
+        }
+    };
+    ($($method:ident => $event_name:literal,)+) => {
+        $(
+            aggregate_methods!($method => $event_name);
+        )+
+
+    };
+}
+
+pub struct Analytics {
+    // TODO: TAMO: remove
+    inner: Option<SegmentAnalytics>,
+
+    instance_uid: Option<InstanceUid>,
+    user: Option<User>,
+    events: HashMap<TypeId, Box<dyn Aggregate>>,
+}
+
+impl Analytics {
+    fn no_analytics() -> Self {
+        Self { inner: None, events: HashMap::new(), instance_uid: None, user: None }
+    }
+
+    fn segment_analytics(segment: SegmentAnalytics) -> Self {
+        Self {
+            instance_uid: Some(segment.instance_uid),
+            user: Some(segment.user),
+            inner: Some(segment),
+            events: HashMap::new(),
+        }
+    }
+
+    pub fn instance_uid(&self) -> Option<&InstanceUid> {
+        self.instance_uid
+    }
 
     /// The method used to publish most analytics that do not need to be batched every hours
-    fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>);
-
-    /// This method should be called to aggregate a get search
-    fn get_search(&self, aggregate: SearchAggregator);
-
-    /// This method should be called to aggregate a post search
-    fn post_search(&self, aggregate: SearchAggregator);
-
-    /// This method should be called to aggregate a get similar request
-    fn get_similar(&self, aggregate: SimilarAggregator);
-
-    /// This method should be called to aggregate a post similar request
-    fn post_similar(&self, aggregate: SimilarAggregator);
-
-    /// This method should be called to aggregate a post array of searches
-    fn post_multi_search(&self, aggregate: MultiSearchAggregator);
-
-    /// This method should be called to aggregate post facet values searches
-    fn post_facet_search(&self, aggregate: FacetSearchAggregator);
-
-    // this method should be called to aggregate an add documents request
-    fn add_documents(
-        &self,
-        documents_query: &UpdateDocumentsQuery,
-        index_creation: bool,
-        request: &HttpRequest,
-    );
-
-    // this method should be called to aggregate a fetch documents request
-    fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest);
-
-    // this method should be called to aggregate a fetch documents request
-    fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest);
-
-    // this method should be called to aggregate a add documents request
-    fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest);
-
-    // this method should be called to batch an update documents request
-    fn update_documents(
-        &self,
-        documents_query: &UpdateDocumentsQuery,
-        index_creation: bool,
-        request: &HttpRequest,
-    );
-
-    // this method should be called to batch an update documents by function request
-    fn update_documents_by_function(
-        &self,
-        documents_query: &DocumentEditionByFunction,
-        index_creation: bool,
-        request: &HttpRequest,
-    );
+    pub fn publish(&self, send: impl Aggregate, request: Option<&HttpRequest>) {
+        let Some(segment) = self.inner else { return };
+    }
 }
diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 476b3264e..8a6dfd780 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -25,7 +25,8 @@ use tokio::sync::mpsc::{self, Receiver, Sender};
 use uuid::Uuid;
 
 use super::{
-    config_user_id_path, DocumentDeletionKind, DocumentFetchKind, MEILISEARCH_CONFIG_PATH,
+    config_user_id_path, Aggregate, AggregateMethod, DocumentDeletionKind, DocumentFetchKind,
+    MEILISEARCH_CONFIG_PATH,
 };
 use crate::analytics::Analytics;
 use crate::option::{
@@ -40,7 +41,7 @@ use crate::search::{
     DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
     DEFAULT_SEMANTIC_RATIO,
 };
-use crate::Opt;
+use crate::{aggregate_methods, Opt};
 
 const ANALYTICS_HEADER: &str = "X-Meilisearch-Client";
 
@@ -87,9 +88,9 @@ pub enum AnalyticsMsg {
 }
 
 pub struct SegmentAnalytics {
-    instance_uid: InstanceUid,
+    pub instance_uid: InstanceUid,
     sender: Sender<AnalyticsMsg>,
-    user: User,
+    pub user: User,
 }
 
 impl SegmentAnalytics {
@@ -98,7 +99,7 @@ impl SegmentAnalytics {
         opt: &Opt,
         index_scheduler: Arc<IndexScheduler>,
         auth_controller: Arc<AuthController>,
-    ) -> Arc<dyn Analytics> {
+    ) -> Arc<Analytics> {
         let instance_uid = super::find_user_id(&opt.db_path);
         let first_time_run = instance_uid.is_none();
         let instance_uid = instance_uid.unwrap_or_else(Uuid::new_v4);
@@ -108,7 +109,7 @@ impl SegmentAnalytics {
 
         // if reqwest throws an error we won't be able to send analytics
         if client.is_err() {
-            return super::MockAnalytics::new(opt);
+            return Arc::new(Analytics::no_analytics());
         }
 
         let client =
@@ -161,10 +162,11 @@ impl SegmentAnalytics {
 
         let this = Self { instance_uid, sender, user: user.clone() };
 
-        Arc::new(this)
+        Arc::new(Analytics::segment_analytics(this))
     }
 }
 
+/*
 impl super::Analytics for SegmentAnalytics {
     fn instance_uid(&self) -> Option<&InstanceUid> {
         Some(&self.instance_uid)
@@ -253,6 +255,7 @@ impl super::Analytics for SegmentAnalytics {
         let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFetchDocuments(aggregate));
     }
 }
+*/
 
 /// This structure represent the `infos` field we send in the analytics.
 /// It's quite close to the `Opt` structure except all sensitive informations
@@ -607,12 +610,7 @@ impl Segment {
 }
 
 #[derive(Default)]
-pub struct SearchAggregator {
-    timestamp: Option<OffsetDateTime>,
-
-    // context
-    user_agents: HashSet<String>,
-
+pub struct SearchAggregator<Method: AggregateMethod> {
     // requests
     total_received: usize,
     total_succeeded: usize,
@@ -684,9 +682,11 @@ pub struct SearchAggregator {
     show_ranking_score: bool,
     show_ranking_score_details: bool,
     ranking_score_threshold: bool,
+
+    marker: std::marker::PhantomData<Method>,
 }
 
-impl SearchAggregator {
+impl<Method: AggregateMethod> SearchAggregator<Method> {
     #[allow(clippy::field_reassign_with_default)]
     pub fn from_query(query: &SearchQuery, request: &HttpRequest) -> Self {
         let SearchQuery {
@@ -827,12 +827,21 @@ impl SearchAggregator {
         }
         self.time_spent.push(*processing_time_ms as usize);
     }
+}
 
-    /// Aggregate one [SearchAggregator] into another.
-    pub fn aggregate(&mut self, mut other: Self) {
+aggregate_methods!(
+    SearchGET => "Documents Searched GET",
+    SearchPOST => "Documents Searched POST",
+
+);
+
+impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
+    fn event_name(&self) -> &'static str {
+        Method::event_name()
+    }
+
+    fn aggregate(mut self, mut other: Self) -> Self {
         let Self {
-            timestamp,
-            user_agents,
             total_received,
             total_succeeded,
             ref mut time_spent,
@@ -871,17 +880,9 @@ impl SearchAggregator {
             total_used_negative_operator,
             ranking_score_threshold,
             ref mut locales,
+            marker: _,
         } = other;
 
-        if self.timestamp.is_none() {
-            self.timestamp = timestamp;
-        }
-
-        // context
-        for user_agent in user_agents.into_iter() {
-            self.user_agents.insert(user_agent);
-        }
-
         // request
         self.total_received = self.total_received.saturating_add(total_received);
         self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
@@ -961,12 +962,12 @@ impl SearchAggregator {
 
         // locales
         self.locales.append(locales);
+
+        self
     }
 
-    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
+    fn into_event(self) -> Option<Track> {
         let Self {
-            timestamp,
-            user_agents,
             total_received,
             total_succeeded,
             time_spent,
@@ -1005,90 +1006,78 @@ impl SearchAggregator {
             total_used_negative_operator,
             ranking_score_threshold,
             locales,
+            marker: _,
         } = self;
 
-        if total_received == 0 {
-            None
-        } else {
-            // we get all the values in a sorted manner
-            let time_spent = time_spent.into_sorted_vec();
-            // the index of the 99th percentage of value
-            let percentile_99th = time_spent.len() * 99 / 100;
-            // We are only interested by the slowest value of the 99th fastest results
-            let time_spent = time_spent.get(percentile_99th);
+        // we get all the values in a sorted manner
+        let time_spent = time_spent.into_sorted_vec();
+        // the index of the 99th percentage of value
+        let percentile_99th = time_spent.len() * 99 / 100;
+        // We are only interested by the slowest value of the 99th fastest results
+        let time_spent = time_spent.get(percentile_99th);
 
-            let properties = json!({
-                "user-agent": user_agents,
-                "requests": {
-                    "99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
-                    "total_succeeded": total_succeeded,
-                    "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
-                    "total_received": total_received,
-                    "total_degraded": total_degraded,
-                    "total_used_negative_operator": total_used_negative_operator,
-                },
-                "sort": {
-                    "with_geoPoint": sort_with_geo_point,
-                    "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
-                },
-                "distinct": distinct,
-                "filter": {
-                   "with_geoRadius": filter_with_geo_radius,
-                   "with_geoBoundingBox": filter_with_geo_bounding_box,
-                   "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
-                   "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
-                },
-                "attributes_to_search_on": {
-                   "total_number_of_uses": attributes_to_search_on_total_number_of_uses,
-                },
-                "q": {
-                   "max_terms_number": max_terms_number,
-                },
-                "vector": {
-                    "max_vector_size": max_vector_size,
-                    "retrieve_vectors": retrieve_vectors,
-                },
-                "hybrid": {
-                    "enabled": hybrid,
-                    "semantic_ratio": semantic_ratio,
-                },
-                "pagination": {
-                   "max_limit": max_limit,
-                   "max_offset": max_offset,
-                   "most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" },
-                },
-                "formatting": {
-                    "max_attributes_to_retrieve": max_attributes_to_retrieve,
-                    "max_attributes_to_highlight": max_attributes_to_highlight,
-                    "highlight_pre_tag": highlight_pre_tag,
-                    "highlight_post_tag": highlight_post_tag,
-                    "max_attributes_to_crop": max_attributes_to_crop,
-                    "crop_marker": crop_marker,
-                    "show_matches_position": show_matches_position,
-                    "crop_length": crop_length,
-                },
-                "facets": {
-                    "avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64),
-                },
-                "matching_strategy": {
-                    "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
-                },
-                "locales": locales,
-                "scoring": {
-                    "show_ranking_score": show_ranking_score,
-                    "show_ranking_score_details": show_ranking_score_details,
-                    "ranking_score_threshold": ranking_score_threshold,
-                },
-            });
-
-            Some(Track {
-                timestamp,
-                user: user.clone(),
-                event: event_name.to_string(),
-                properties,
-                ..Default::default()
-            })
-        }
+        json!({
+            "requests": {
+                "99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
+                "total_succeeded": total_succeeded,
+                "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
+                "total_received": total_received,
+                "total_degraded": total_degraded,
+                "total_used_negative_operator": total_used_negative_operator,
+            },
+            "sort": {
+                "with_geoPoint": sort_with_geo_point,
+                "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
+            },
+            "distinct": distinct,
+            "filter": {
+               "with_geoRadius": filter_with_geo_radius,
+               "with_geoBoundingBox": filter_with_geo_bounding_box,
+               "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
+               "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
+            },
+            "attributes_to_search_on": {
+               "total_number_of_uses": attributes_to_search_on_total_number_of_uses,
+            },
+            "q": {
+               "max_terms_number": max_terms_number,
+            },
+            "vector": {
+                "max_vector_size": max_vector_size,
+                "retrieve_vectors": retrieve_vectors,
+            },
+            "hybrid": {
+                "enabled": hybrid,
+                "semantic_ratio": semantic_ratio,
+            },
+            "pagination": {
+               "max_limit": max_limit,
+               "max_offset": max_offset,
+               "most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" },
+            },
+            "formatting": {
+                "max_attributes_to_retrieve": max_attributes_to_retrieve,
+                "max_attributes_to_highlight": max_attributes_to_highlight,
+                "highlight_pre_tag": highlight_pre_tag,
+                "highlight_post_tag": highlight_post_tag,
+                "max_attributes_to_crop": max_attributes_to_crop,
+                "crop_marker": crop_marker,
+                "show_matches_position": show_matches_position,
+                "crop_length": crop_length,
+            },
+            "facets": {
+                "avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64),
+            },
+            "matching_strategy": {
+                "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
+            },
+            "locales": locales,
+            "scoring": {
+                "show_ranking_score": show_ranking_score,
+                "show_ranking_score_details": show_ranking_score_details,
+                "ranking_score_threshold": ranking_score_threshold,
+            },
+        })
     }
 }
 
diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs
index b24f18fae..80177876a 100644
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@@ -120,7 +120,7 @@ pub fn create_app(
     search_queue: Data<SearchQueue>,
     opt: Opt,
     logs: (LogRouteHandle, LogStderrHandle),
-    analytics: Arc<dyn Analytics>,
+    analytics: Arc<Analytics>,
     enable_dashboard: bool,
 ) -> actix_web::App<
     impl ServiceFactory<
@@ -473,7 +473,7 @@ pub fn configure_data(
     search_queue: Data<SearchQueue>,
     opt: &Opt,
     (logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
-    analytics: Arc<dyn Analytics>,
+    analytics: Arc<Analytics>,
 ) {
     let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
     config
diff --git a/meilisearch/src/routes/dump.rs b/meilisearch/src/routes/dump.rs
index 7f3cd06a5..0fdeef5ed 100644
--- a/meilisearch/src/routes/dump.rs
+++ b/meilisearch/src/routes/dump.rs
@@ -4,7 +4,6 @@ use index_scheduler::IndexScheduler;
 use meilisearch_auth::AuthController;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::tasks::KindWithContent;
-use serde_json::json;
 use tracing::debug;
 
 use crate::analytics::Analytics;
@@ -18,14 +17,16 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
     cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
 }
 
+crate::empty_analytics!(DumpAnalytics, "Dump Created");
+
 pub async fn create_dump(
     index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
     auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<AuthController>>,
     req: HttpRequest,
     opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
-    analytics.publish("Dump Created".to_string(), json!({}), Some(&req));
+    analytics.publish(DumpAnalytics::default(), Some(&req));
 
     let task = KindWithContent::DumpCreation {
         keys: auth_controller.list_keys()?,
diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs
index bc656bdbb..24c89938d 100644
--- a/meilisearch/src/routes/features.rs
+++ b/meilisearch/src/routes/features.rs
@@ -6,10 +6,11 @@ use index_scheduler::IndexScheduler;
 use meilisearch_types::deserr::DeserrJsonError;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::keys::actions;
+use serde::Serialize;
 use serde_json::json;
 use tracing::debug;
 
-use crate::analytics::Analytics;
+use crate::analytics::{Aggregate, Analytics};
 use crate::extractors::authentication::policies::ActionPolicy;
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
@@ -22,17 +23,19 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
     );
 }
 
+crate::empty_analytics!(GetExperimentalFeatureAnalytics, "Experimental features Seen");
+
 async fn get_features(
     index_scheduler: GuardedData<
         ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_GET }>,
         Data<IndexScheduler>,
     >,
     req: HttpRequest,
-    analytics: Data<dyn Analytics>,
+    analytics: Data<Analytics>,
 ) -> HttpResponse {
     let features = index_scheduler.features();
 
-    analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
+    analytics.publish(GetExperimentalFeatureAnalytics::default(), Some(&req));
     let features = features.runtime_features();
     debug!(returns = ?features, "Get features");
     HttpResponse::Ok().json(features)
@@ -53,6 +56,38 @@ pub struct RuntimeTogglableFeatures {
     pub contains_filter: Option<bool>,
 }
 
+#[derive(Serialize)]
+pub struct PatchExperimentalFeatureAnalytics {
+    vector_store: bool,
+    metrics: bool,
+    logs_route: bool,
+    edit_documents_by_function: bool,
+    contains_filter: bool,
+}
+
+impl Aggregate for PatchExperimentalFeatureAnalytics {
+    fn event_name(&self) -> &'static str {
+        "Experimental features Updated"
+    }
+
+    fn aggregate(self, other: Self) -> Self
+    where
+        Self: Sized,
+    {
+        Self {
+            vector_store: other.vector_store,
+            metrics: other.metrics,
+            logs_route: other.logs_route,
+            edit_documents_by_function: other.edit_documents_by_function,
+            contains_filter: other.contains_filter,
+        }
+    }
+
+    fn into_event(self) -> serde_json::Value {
+        serde_json::to_value(self).unwrap()
+    }
+}
+
 async fn patch_features(
     index_scheduler: GuardedData<
         ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_UPDATE }>,
@@ -60,7 +95,7 @@ async fn patch_features(
     >,
     new_features: AwebJson<RuntimeTogglableFeatures, DeserrJsonError>,
     req: HttpRequest,
-    analytics: Data<dyn Analytics>,
+    analytics: Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     let features = index_scheduler.features();
     debug!(parameters = ?new_features, "Patch features");
@@ -89,14 +124,13 @@ async fn patch_features(
     } = new_features;
 
     analytics.publish(
-        "Experimental features Updated".to_string(),
-        json!({
-            "vector_store": vector_store,
-            "metrics": metrics,
-            "logs_route": logs_route,
-            "edit_documents_by_function": edit_documents_by_function,
-            "contains_filter": contains_filter,
-        }),
+        PatchExperimentalFeatureAnalytics {
+            vector_store,
+            metrics,
+            logs_route,
+            edit_documents_by_function,
+            contains_filter,
+        },
         Some(&req),
     );
     index_scheduler.put_runtime_features(new_features)?;
diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs
index 85cf33c54..8f4cd026d 100644
--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@@ -1,4 +1,6 @@
+use std::collections::HashSet;
 use std::io::ErrorKind;
+use std::marker::PhantomData;
 
 use actix_web::http::header::CONTENT_TYPE;
 use actix_web::web::Data;
@@ -23,14 +25,14 @@ use meilisearch_types::tasks::KindWithContent;
 use meilisearch_types::{milli, Document, Index};
 use mime::Mime;
 use once_cell::sync::Lazy;
-use serde::Deserialize;
+use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use tempfile::tempfile;
 use tokio::fs::File;
 use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};
 use tracing::debug;
 
-use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind};
+use crate::analytics::{Aggregate, AggregateMethod, Analytics, DocumentDeletionKind};
 use crate::error::MeilisearchHttpError;
 use crate::error::PayloadError::ReceivePayload;
 use crate::extractors::authentication::policies::*;
@@ -41,7 +43,7 @@ use crate::routes::{
     get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
 };
 use crate::search::{parse_filter, RetrieveVectors};
-use crate::Opt;
+use crate::{aggregate_methods, Opt};
 
 static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
     vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
@@ -100,12 +102,82 @@ pub struct GetDocument {
     retrieve_vectors: Param<bool>,
 }
 
+#[derive(Default, Serialize)]
+pub struct DocumentsFetchAggregator {
+    #[serde(rename = "requests.total_received")]
+    total_received: usize,
+
+    // a call on ../documents/:doc_id
+    per_document_id: bool,
+    // if a filter was used
+    per_filter: bool,
+
+    #[serde(rename = "vector.retrieve_vectors")]
+    retrieve_vectors: bool,
+
+    // pagination
+    #[serde(rename = "pagination.max_limit")]
+    max_limit: usize,
+    #[serde(rename = "pagination.max_offset")]
+    max_offset: usize,
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum DocumentFetchKind {
+    PerDocumentId { retrieve_vectors: bool },
+    Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
+}
+
+impl DocumentsFetchAggregator {
+    pub fn from_query(query: &DocumentFetchKind) -> Self {
+        let (limit, offset, retrieve_vectors) = match query {
+            DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors),
+            DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => {
+                (*limit, *offset, *retrieve_vectors)
+            }
+        };
+        Self {
+            total_received: 1,
+            per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
+            per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
+            max_limit: limit,
+            max_offset: offset,
+            retrieve_vectors,
+        }
+    }
+}
+
+impl Aggregate for DocumentsFetchAggregator {
+    // TODO: TAMO: Should we do the same event for the GET requests
+    fn event_name(&self) -> &'static str {
+        "Documents Fetched POST"
+    }
+
+    fn aggregate(self, other: Self) -> Self
+    where
+        Self: Sized,
+    {
+        Self {
+            total_received: self.total_received.saturating_add(other.total_received),
+            per_document_id: self.per_document_id | other.per_document_id,
+            per_filter: self.per_filter | other.per_filter,
+            retrieve_vectors: self.retrieve_vectors | other.retrieve_vectors,
+            max_limit: self.max_limit.max(other.max_limit),
+            max_offset: self.max_offset.max(other.max_offset),
+        }
+    }
+
+    fn into_event(self) -> Value {
+        serde_json::to_value(self).unwrap()
+    }
+}
+
 pub async fn get_document(
     index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
     document_param: web::Path<DocumentParam>,
     params: AwebQueryParameter<GetDocument, DeserrQueryParamError>,
     req: HttpRequest,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     let DocumentParam { index_uid, document_id } = document_param.into_inner();
     debug!(parameters = ?params, "Get document");
@@ -117,9 +189,12 @@ pub async fn get_document(
     let features = index_scheduler.features();
     let retrieve_vectors = RetrieveVectors::new(param_retrieve_vectors.0, features)?;
 
-    analytics.get_fetch_documents(
-        &DocumentFetchKind::PerDocumentId { retrieve_vectors: param_retrieve_vectors.0 },
-        &req,
+    analytics.publish(
+        DocumentsFetchAggregator {
+            retrieve_vectors: param_retrieve_vectors.0,
+            ..Default::default()
+        },
+        Some(&req),
     );
 
     let index = index_scheduler.index(&index_uid)?;
@@ -129,17 +204,57 @@ pub async fn get_document(
     Ok(HttpResponse::Ok().json(document))
 }
 
+#[derive(Default, Serialize)]
+pub struct DocumentsDeletionAggregator {
+    #[serde(rename = "requests.total_received")]
+    total_received: usize,
+    per_document_id: bool,
+    clear_all: bool,
+    per_batch: bool,
+    per_filter: bool,
+}
+
+impl Aggregate for DocumentsDeletionAggregator {
+    fn event_name(&self) -> &'static str {
+        "Documents Deleted"
+    }
+
+    fn aggregate(self, other: Self) -> Self
+    where
+        Self: Sized,
+    {
+        Self {
+            total_received: self.total_received.saturating_add(other.total_received),
+            per_document_id: self.per_document_id | other.per_document_id,
+            clear_all: self.clear_all | other.clear_all,
+            per_batch: self.per_batch | other.per_batch,
+            per_filter: self.per_filter | other.per_filter,
+        }
+    }
+
+    fn into_event(self) -> Value {
+        serde_json::to_value(self).unwrap()
+    }
+}
+
 pub async fn delete_document(
     index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
     path: web::Path<DocumentParam>,
     req: HttpRequest,
     opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     let DocumentParam { index_uid, document_id } = path.into_inner();
     let index_uid = IndexUid::try_from(index_uid)?;
 
-    analytics.delete_documents(DocumentDeletionKind::PerDocumentId, &req);
+    analytics.publish(
+        DocumentsDeletionAggregator {
+            total_received: 1,
+            per_document_id: true,
+            ..Default::default()
+        },
+        Some(&req),
+    );
 
     let task = KindWithContent::DocumentDeletion {
         index_uid: index_uid.to_string(),
@@ -190,19 +305,21 @@ pub async fn documents_by_query_post(
     index_uid: web::Path<String>,
     body: AwebJson<BrowseQuery, DeserrJsonError>,
     req: HttpRequest,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     let body = body.into_inner();
     debug!(parameters = ?body, "Get documents POST");
 
-    analytics.post_fetch_documents(
-        &DocumentFetchKind::Normal {
-            with_filter: body.filter.is_some(),
-            limit: body.limit,
-            offset: body.offset,
+    analytics.publish(
+        DocumentsFetchAggregator {
+            total_received: 1,
+            per_filter: body.filter.is_some(),
             retrieve_vectors: body.retrieve_vectors,
+            max_limit: body.limit,
+            max_offset: body.offset,
+            ..Default::default()
         },
-        &req,
+        Some(&req),
     );
 
     documents_by_query(&index_scheduler, index_uid, body)
@@ -213,7 +330,7 @@ pub async fn get_documents(
     index_uid: web::Path<String>,
     params: AwebQueryParameter<BrowseQueryGet, DeserrQueryParamError>,
     req: HttpRequest,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     debug!(parameters = ?params, "Get documents GET");
 
@@ -235,14 +352,16 @@ pub async fn get_documents(
         filter,
     };
 
-    analytics.get_fetch_documents(
-        &DocumentFetchKind::Normal {
-            with_filter: query.filter.is_some(),
-            limit: query.limit,
-            offset: query.offset,
+    analytics.publish(
+        DocumentsFetchAggregator {
+            total_received: 1,
+            per_filter: query.filter.is_some(),
             retrieve_vectors: query.retrieve_vectors,
+            max_limit: query.limit,
+            max_offset: query.offset,
+            ..Default::default()
         },
-        &req,
+        Some(&req),
     );
 
     documents_by_query(&index_scheduler, index_uid, query)
@@ -298,6 +417,42 @@ fn from_char_csv_delimiter(
     }
 }
 
+aggregate_methods!(
+    Replaced => "Documents Added",
+    Updated => "Documents Updated",
+);
+
+#[derive(Default, Serialize)]
+pub struct DocumentsAggregator<T: AggregateMethod> {
+    payload_types: HashSet<String>,
+    primary_key: HashSet<String>,
+    index_creation: bool,
+    #[serde(skip)]
+    method: PhantomData<T>,
+}
+
+impl<Method: AggregateMethod> Aggregate for DocumentsAggregator<Method> {
+    fn event_name(&self) -> &'static str {
+        Method::event_name()
+    }
+
+    fn aggregate(mut self, other: Self) -> Self
+    where
+        Self: Sized,
+    {
+        Self {
+            payload_types: self.payload_types.union(&other.payload_types).collect(),
+            primary_key: self.primary_key.union(&other.primary_key).collect(),
+            index_creation: self.index_creation | other.index_creation,
+            method: PhantomData,
+        }
+    }
+
+    fn into_event(self) -> Value {
+        serde_json::to_value(self).unwrap()
+    }
+}
+
 pub async fn replace_documents(
     index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
     index_uid: web::Path<String>,
@@ -305,17 +460,33 @@ pub async fn replace_documents(
     body: Payload,
     req: HttpRequest,
     opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     let index_uid = IndexUid::try_from(index_uid.into_inner())?;
 
     debug!(parameters = ?params, "Replace documents");
     let params = params.into_inner();
 
-    analytics.add_documents(
-        &params,
-        index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
-        &req,
+    let mut content_types = HashSet::new();
+    let content_type = req
+        .headers()
+        .get(CONTENT_TYPE)
+        .and_then(|s| s.to_str().ok())
+        .unwrap_or("unknown")
+        .to_string();
+    content_types.insert(content_type);
+    let mut primary_keys = HashSet::new();
+    if let Some(primary_key) = params.primary_key.clone() {
+        primary_keys.insert(primary_key);
+    }
+    analytics.publish(
+        DocumentsAggregator::<Replaced> {
+            payload_types: content_types,
+            primary_key: primary_keys,
+            index_creation: index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
+            method: PhantomData,
+        },
+        Some(&req),
     );
 
     let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
@@ -346,17 +517,33 @@ pub async fn update_documents(
     body: Payload,
     req: HttpRequest,
     opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     let index_uid = IndexUid::try_from(index_uid.into_inner())?;
 
     let params = params.into_inner();
     debug!(parameters = ?params, "Update documents");
 
-    analytics.add_documents(
-        &params,
-        index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
-        &req,
+    let mut content_types = HashSet::new();
+    let content_type = req
+        .headers()
+        .get(CONTENT_TYPE)
+        .and_then(|s| s.to_str().ok())
+        .unwrap_or("unknown")
+        .to_string();
+    content_types.insert(content_type);
+    let mut primary_keys = HashSet::new();
+    if let Some(primary_key) = params.primary_key.clone() {
+        primary_keys.insert(primary_key);
+    }
+    analytics.publish(
+        DocumentsAggregator::<Updated> {
+            payload_types: content_types,
+            primary_key: primary_keys,
+            index_creation: index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
+            method: PhantomData,
+        },
+        Some(&req),
     );
 
     let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
@@ -524,12 +711,15 @@ pub async fn delete_documents_batch(
     body: web::Json<Vec<Value>>,
     req: HttpRequest,
     opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     debug!(parameters = ?body, "Delete documents by batch");
     let index_uid = IndexUid::try_from(index_uid.into_inner())?;
 
-    analytics.delete_documents(DocumentDeletionKind::PerBatch, &req);
+    analytics.publish(
+        DocumentsDeletionAggregator { total_received: 1, per_batch: true, ..Default::default() },
+        Some(&req),
+    );
 
     let ids = body
         .iter()
@@ -562,14 +752,17 @@ pub async fn delete_documents_by_filter(
     body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
     req: HttpRequest,
     opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     debug!(parameters = ?body, "Delete documents by filter");
     let index_uid = IndexUid::try_from(index_uid.into_inner())?;
     let index_uid = index_uid.into_inner();
     let filter = body.into_inner().filter;
 
-    analytics.delete_documents(DocumentDeletionKind::PerFilter, &req);
+    analytics.publish(
+        DocumentsDeletionAggregator { total_received: 1, per_filter: true, ..Default::default() },
+        Some(&req),
+    );
 
     // we ensure the filter is well formed before enqueuing it
     crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())?
@@ -599,13 +792,44 @@ pub struct DocumentEditionByFunction {
     pub function: String,
 }
 
+#[derive(Default, Serialize)]
+struct EditDocumentsByFunctionAggregator {
+    // Set to true if at least one request was filtered
+    filtered: bool,
+    // Set to true if at least one request contained a context
+    with_context: bool,
+
+    index_creation: bool,
+}
+
+impl Aggregate for EditDocumentsByFunctionAggregator {
+    fn event_name(&self) -> &'static str {
+        "Documents Edited By Function"
+    }
+
+    fn aggregate(self, other: Self) -> Self
+    where
+        Self: Sized,
+    {
+        Self {
+            filtered: self.filtered | other.filtered,
+            with_context: self.with_context | other.with_context,
+            index_creation: self.index_creation | other.index_creation,
+        }
+    }
+
+    fn into_event(self) -> Value {
+        serde_json::to_value(self).unwrap()
+    }
+}
+
 pub async fn edit_documents_by_function(
     index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ALL }>, Data<IndexScheduler>>,
     index_uid: web::Path<String>,
     params: AwebJson<DocumentEditionByFunction, DeserrJsonError>,
     req: HttpRequest,
     opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     debug!(parameters = ?params, "Edit documents by function");
 
@@ -617,10 +841,13 @@ pub async fn edit_documents_by_function(
     let index_uid = index_uid.into_inner();
     let params = params.into_inner();
 
-    analytics.update_documents_by_function(
-        &params,
-        index_scheduler.index(&index_uid).is_err(),
-        &req,
+    analytics.publish(
+        EditDocumentsByFunctionAggregator {
+            filtered: params.filter.is_some(),
+            with_context: params.context.is_some(),
+            index_creation: index_scheduler.index(&index_uid).is_err(),
+        },
+        Some(&req),
     );
 
     let DocumentEditionByFunction { filter, context, function } = params;
@@ -670,10 +897,13 @@ pub async fn clear_all_documents(
     index_uid: web::Path<String>,
     req: HttpRequest,
     opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     let index_uid = IndexUid::try_from(index_uid.into_inner())?;
-    analytics.delete_documents(DocumentDeletionKind::ClearAll, &req);
+    analytics.publish(
+        DocumentsDeletionAggregator { total_received: 1, clear_all: true, ..Default::default() },
+        Some(&req),
+    );
 
     let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
     let uid = get_task_id(&req, &opt)?;
diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs
index 1df80711d..1e9d0e15e 100644
--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@@ -1,3 +1,5 @@
+use std::collections::{BinaryHeap, HashSet};
+
 use actix_web::web::Data;
 use actix_web::{web, HttpRequest, HttpResponse};
 use deserr::actix_web::AwebJson;
@@ -10,14 +12,15 @@ use meilisearch_types::locales::Locale;
 use serde_json::Value;
 use tracing::debug;
 
-use crate::analytics::{Analytics, FacetSearchAggregator};
+use crate::analytics::{Aggregate, Analytics};
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::routes::indexes::search::search_kind;
 use crate::search::{
-    add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
-    SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
-    DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
+    add_search_rules, perform_facet_search, FacetSearchResult, HybridQuery, MatchingStrategy,
+    RankingScoreThreshold, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
+    DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
+    DEFAULT_SEARCH_OFFSET,
 };
 use crate::search_queue::SearchQueue;
 
@@ -53,13 +56,110 @@ pub struct FacetSearchQuery {
     pub locales: Option<Vec<Locale>>,
 }
 
+#[derive(Default)]
+pub struct FacetSearchAggregator {
+    // requests
+    total_received: usize,
+    total_succeeded: usize,
+    time_spent: BinaryHeap<usize>,
+
+    // The set of all facetNames that were used
+    facet_names: HashSet<String>,
+
+    // As there been any other parameter than the facetName or facetQuery ones?
+    additional_search_parameters_provided: bool,
+}
+
+impl FacetSearchAggregator {
+    #[allow(clippy::field_reassign_with_default)]
+    pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self {
+        let FacetSearchQuery {
+            facet_query: _,
+            facet_name,
+            vector,
+            q,
+            filter,
+            matching_strategy,
+            attributes_to_search_on,
+            hybrid,
+            ranking_score_threshold,
+            locales,
+        } = query;
+
+        Self {
+            total_received: 1,
+            facet_names: Some(facet_name.clone()).into_iter().collect(),
+            additional_search_parameters_provided: q.is_some()
+                || vector.is_some()
+                || filter.is_some()
+                || *matching_strategy != MatchingStrategy::default()
+                || attributes_to_search_on.is_some()
+                || hybrid.is_some()
+                || ranking_score_threshold.is_some()
+                || locales.is_some(),
+            ..Default::default()
+        }
+    }
+
+    pub fn succeed(&mut self, result: &FacetSearchResult) {
+        let FacetSearchResult { facet_hits: _, facet_query: _, processing_time_ms } = result;
+        self.total_succeeded = 1;
+        self.time_spent.push(*processing_time_ms as usize);
+    }
+}
+
+impl Aggregate for FacetSearchAggregator {
+    fn event_name(&self) -> &'static str {
+        "Facet Searched POST"
+    }
+
+    fn aggregate(mut self, other: Self) -> Self
+    where
+        Self: Sized,
+    {
+        self.time_spent.insert(other.time_spent);
+
+        Self {
+            total_received: self.total_received.saturating_add(other.total_received),
+            total_succeeded: self.total_succeeded.saturating_add(other.total_succeeded),
+            time_spent: self.time_spent,
+            facet_names: self.facet_names.union(&other.facet_names).collect(),
+            additional_search_parameters_provided: self.additional_search_parameters_provided
+                | other.additional_search_parameters_provided,
+        }
+    }
+
+    fn into_event(self) -> Value {
+        let Self {
+            total_received,
+            total_succeeded,
+            time_spent,
+            facet_names,
+            additional_search_parameters_provided,
+        } = self;
+
+        serde_json::json!({
+            "requests": {
+                "99th_response_time":  time_spent.map(|t| format!("{:.2}", t)),
+                "total_succeeded": total_succeeded,
+                "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
+                "total_received": total_received,
+            },
+            "facets": {
+                "total_distinct_facet_count": facet_names.len(),
+                "additional_search_parameters_provided": additional_search_parameters_provided,
+            },
+        })
+    }
+}
+
 pub async fn search(
     index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
     search_queue: Data<SearchQueue>,
     index_uid: web::Path<String>,
     params: AwebJson<FacetSearchQuery, DeserrJsonError>,
     req: HttpRequest,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     let index_uid = IndexUid::try_from(index_uid.into_inner())?;
 
@@ -100,7 +200,7 @@ pub async fn search(
     if let Ok(ref search_result) = search_result {
         aggregate.succeed(search_result);
     }
-    analytics.post_facet_search(aggregate);
+    analytics.publish(aggregate, Some(&req));
 
     let search_result = search_result?;
 
diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs
index 35b747ccf..483a48a16 100644
--- a/meilisearch/src/routes/indexes/mod.rs
+++ b/meilisearch/src/routes/indexes/mod.rs
@@ -1,3 +1,4 @@
+use std::collections::BTreeSet;
 use std::convert::Infallible;
 
 use actix_web::web::Data;
@@ -18,7 +19,7 @@ use time::OffsetDateTime;
 use tracing::debug;
 
 use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
-use crate::analytics::Analytics;
+use crate::analytics::{Aggregate, Analytics};
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::{AuthenticationError, GuardedData};
 use crate::extractors::sequential_extractor::SeqHandler;
@@ -123,12 +124,34 @@ pub struct IndexCreateRequest {
     primary_key: Option<String>,
 }
 
+#[derive(Serialize)]
+struct IndexCreatedAggregate {
+    primary_key: BTreeSet<String>,
+}
+
+impl Aggregate for IndexCreatedAggregate {
+    fn event_name(&self) -> &'static str {
+        "Index Created"
+    }
+
+    fn aggregate(self, other: Self) -> Self
+    where
+        Self: Sized,
+    {
+        Self { primary_key: self.primary_key.union(&other.primary_key).collect() }
+    }
+
+    fn into_event(self) -> impl Serialize {
+        self
+    }
+}
+
 pub async fn create_index(
     index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_CREATE }>, Data<IndexScheduler>>,
     body: AwebJson<IndexCreateRequest, DeserrJsonError>,
     req: HttpRequest,
     opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     debug!(parameters = ?body, "Create index");
     let IndexCreateRequest { primary_key, uid } = body.into_inner();
@@ -136,8 +159,7 @@ pub async fn create_index(
     let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid);
     if allow_index_creation {
         analytics.publish(
-            "Index Created".to_string(),
-            json!({ "primary_key": primary_key }),
+            IndexCreatedAggregate { primary_key: primary_key.iter().cloned().collect() },
             Some(&req),
         );
 
@@ -194,20 +216,37 @@ pub async fn get_index(
     Ok(HttpResponse::Ok().json(index_view))
 }
 
+#[derive(Serialize)]
+struct IndexUpdatedAggregate {
+    primary_key: BTreeSet<String>,
+}
+
+impl Aggregate for IndexUpdatedAggregate {
+    fn event_name(&self) -> &'static str {
+        "Index Updated"
+    }
+
+    fn aggregate(self, other: Self) -> Self {
+        Self { primary_key: self.primary_key.union(&other.primary_key).collect() }
+    }
+
+    fn into_event(self) -> impl Serialize {
+        self
+    }
+}
 pub async fn update_index(
     index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_UPDATE }>, Data<IndexScheduler>>,
     index_uid: web::Path<String>,
     body: AwebJson<UpdateIndexRequest, DeserrJsonError>,
     req: HttpRequest,
     opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     debug!(parameters = ?body, "Update index");
     let index_uid = IndexUid::try_from(index_uid.into_inner())?;
     let body = body.into_inner();
     analytics.publish(
-        "Index Updated".to_string(),
-        json!({ "primary_key": body.primary_key }),
+        IndexUpdatedAggregate { primary_key: body.primary_key.iter().cloned().collect() },
         Some(&req),
     );
 
diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs
index 6a8eee521..f833a57d2 100644
--- a/meilisearch/src/routes/indexes/search.rs
+++ b/meilisearch/src/routes/indexes/search.rs
@@ -13,6 +13,7 @@ use meilisearch_types::serde_cs::vec::CS;
 use serde_json::Value;
 use tracing::debug;
 
+use crate::analytics::segment_analytics::{SearchGET, SearchPOST};
 use crate::analytics::{Analytics, SearchAggregator};
 use crate::error::MeilisearchHttpError;
 use crate::extractors::authentication::policies::*;
@@ -225,7 +226,7 @@ pub async fn search_with_url_query(
     index_uid: web::Path<String>,
     params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
     req: HttpRequest,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     debug!(parameters = ?params, "Search get");
     let index_uid = IndexUid::try_from(index_uid.into_inner())?;
@@ -237,7 +238,7 @@ pub async fn search_with_url_query(
         add_search_rules(&mut query.filter, search_rules);
     }
 
-    let mut aggregate = SearchAggregator::from_query(&query, &req);
+    let mut aggregate = SearchAggregator::<SearchGET>::from_query(&query, &req);
 
     let index = index_scheduler.index(&index_uid)?;
     let features = index_scheduler.features();
@@ -254,7 +255,7 @@ pub async fn search_with_url_query(
     if let Ok(ref search_result) = search_result {
         aggregate.succeed(search_result);
     }
-    analytics.get_search(aggregate);
+    analytics.publish(aggregate, Some(&req));
 
     let search_result = search_result?;
 
@@ -268,7 +269,7 @@ pub async fn search_with_post(
     index_uid: web::Path<String>,
     params: AwebJson<SearchQuery, DeserrJsonError>,
     req: HttpRequest,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     let index_uid = IndexUid::try_from(index_uid.into_inner())?;
 
@@ -280,7 +281,7 @@ pub async fn search_with_post(
         add_search_rules(&mut query.filter, search_rules);
     }
 
-    let mut aggregate = SearchAggregator::from_query(&query, &req);
+    let mut aggregate = SearchAggregator::<SearchPOST>::from_query(&query, &req);
 
     let index = index_scheduler.index(&index_uid)?;
 
@@ -302,7 +303,7 @@ pub async fn search_with_post(
             MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
         }
     }
-    analytics.post_search(aggregate);
+    analytics.publish(aggregate, Some(&req));
 
     let search_result = search_result?;
 
diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs
index aaf8673d0..112f8671b 100644
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -1,3 +1,5 @@
+use std::collections::{BTreeSet, HashSet};
+
 use actix_web::web::Data;
 use actix_web::{web, HttpRequest, HttpResponse};
 use deserr::actix_web::AwebJson;
@@ -7,12 +9,15 @@ use meilisearch_types::error::ResponseError;
 use meilisearch_types::facet_values_sort::FacetValuesSort;
 use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::milli::update::Setting;
-use meilisearch_types::settings::{settings, RankingRuleView, SecretPolicy, Settings, Unchecked};
+use meilisearch_types::settings::{
+    settings, ProximityPrecisionView, RankingRuleView, SecretPolicy, Settings, Unchecked,
+};
 use meilisearch_types::tasks::KindWithContent;
+use serde::Serialize;
 use serde_json::json;
 use tracing::debug;
 
-use crate::analytics::Analytics;
+use crate::analytics::{Aggregate, Analytics};
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
@@ -80,7 +85,7 @@ macro_rules! make_setting_route {
                 body: deserr::actix_web::AwebJson<Option<$type>, $err_ty>,
                 req: HttpRequest,
                 opt: web::Data<Opt>,
-                $analytics_var: web::Data<dyn Analytics>,
+                $analytics_var: web::Data<Analytics>,
             ) -> std::result::Result<HttpResponse, ResponseError> {
                 let index_uid = IndexUid::try_from(index_uid.into_inner())?;
 
@@ -162,16 +167,8 @@ make_setting_route!(
     "filterableAttributes",
     analytics,
     |setting: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
         analytics.publish(
-            "FilterableAttributes Updated".to_string(),
-            json!({
-                "filterable_attributes": {
-                    "total": setting.as_ref().map(|filter| filter.len()).unwrap_or(0),
-                    "has_geo": setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false),
-                }
-            }),
+            crate::routes::indexes::settings::FilterableAttributesAnalytics::new(setting.as_ref()).to_settings(),
             Some(req),
         );
     }
@@ -188,16 +185,8 @@ make_setting_route!(
     "sortableAttributes",
     analytics,
     |setting: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
         analytics.publish(
-            "SortableAttributes Updated".to_string(),
-            json!({
-                "sortable_attributes": {
-                    "total": setting.as_ref().map(|sort| sort.len()),
-                    "has_geo": setting.as_ref().map(|sort| sort.contains("_geo")),
-                },
-            }),
+            crate::routes::indexes::settings::SortableAttributesAnalytics::new(setting.as_ref()).to_settings(),
             Some(req),
         );
     }
@@ -214,16 +203,8 @@ make_setting_route!(
     "displayedAttributes",
     analytics,
     |displayed: &Option<Vec<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
         analytics.publish(
-            "DisplayedAttributes Updated".to_string(),
-            json!({
-                "displayed_attributes": {
-                    "total": displayed.as_ref().map(|displayed| displayed.len()),
-                    "with_wildcard": displayed.as_ref().map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
-                },
-            }),
+            crate::routes::indexes::settings::DisplayedAttributesAnalytics::new(displayed.as_ref()).to_settings(),
             Some(req),
         );
     }
@@ -240,35 +221,8 @@ make_setting_route!(
     "typoTolerance",
     analytics,
     |setting: &Option<meilisearch_types::settings::TypoSettings>, req: &HttpRequest| {
-        use serde_json::json;
-
         analytics.publish(
-            "TypoTolerance Updated".to_string(),
-            json!({
-                "typo_tolerance": {
-                    "enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))),
-                    "disable_on_attributes": setting
-                        .as_ref()
-                        .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
-                    "disable_on_words": setting
-                        .as_ref()
-                        .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
-                    "min_word_size_for_one_typo": setting
-                        .as_ref()
-                        .and_then(|s| s.min_word_size_for_typos
-                            .as_ref()
-                            .set()
-                            .map(|s| s.one_typo.set()))
-                        .flatten(),
-                    "min_word_size_for_two_typos": setting
-                        .as_ref()
-                        .and_then(|s| s.min_word_size_for_typos
-                            .as_ref()
-                            .set()
-                            .map(|s| s.two_typos.set()))
-                        .flatten(),
-                },
-            }),
+            crate::routes::indexes::settings::TypoToleranceAnalytics::new(setting.as_ref()).to_settings(),
             Some(req),
         );
     }
@@ -285,16 +239,8 @@ make_setting_route!(
     "searchableAttributes",
     analytics,
     |setting: &Option<Vec<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
         analytics.publish(
-            "SearchableAttributes Updated".to_string(),
-            json!({
-                "searchable_attributes": {
-                    "total": setting.as_ref().map(|searchable| searchable.len()),
-                    "with_wildcard": setting.as_ref().map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
-                },
-            }),
+            crate::routes::indexes::settings::SearchableAttributesAnalytics::new(setting.as_ref()).to_settings(),
             Some(req),
         );
     }
@@ -311,15 +257,8 @@ make_setting_route!(
     "stopWords",
     analytics,
     |stop_words: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
         analytics.publish(
-            "StopWords Updated".to_string(),
-            json!({
-                "stop_words": {
-                    "total": stop_words.as_ref().map(|stop_words| stop_words.len()),
-                },
-            }),
+            crate::routes::indexes::settings::StopWordsAnalytics::new(stop_words.as_ref()).to_settings(),
             Some(req),
         );
     }
@@ -336,15 +275,8 @@ make_setting_route!(
     "nonSeparatorTokens",
     analytics,
     |non_separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
         analytics.publish(
-            "nonSeparatorTokens Updated".to_string(),
-            json!({
-                "non_separator_tokens": {
-                    "total": non_separator_tokens.as_ref().map(|non_separator_tokens| non_separator_tokens.len()),
-                },
-            }),
+            crate::routes::indexes::settings::NonSeparatorTokensAnalytics::new(non_separator_tokens.as_ref()).to_settings(),
             Some(req),
         );
     }
@@ -361,15 +293,8 @@ make_setting_route!(
     "separatorTokens",
     analytics,
     |separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
         analytics.publish(
-            "separatorTokens Updated".to_string(),
-            json!({
-                "separator_tokens": {
-                    "total": separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()),
-                },
-            }),
+            crate::routes::indexes::settings::SeparatorTokensAnalytics::new(separator_tokens.as_ref()).to_settings(),
             Some(req),
         );
     }
@@ -386,15 +311,8 @@ make_setting_route!(
     "dictionary",
     analytics,
     |dictionary: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
         analytics.publish(
-            "dictionary Updated".to_string(),
-            json!({
-                "dictionary": {
-                    "total": dictionary.as_ref().map(|dictionary| dictionary.len()),
-                },
-            }),
+            crate::routes::indexes::settings::DictionaryAnalytics::new(dictionary.as_ref()).to_settings(),
             Some(req),
         );
     }
@@ -411,15 +329,8 @@ make_setting_route!(
     "synonyms",
     analytics,
     |synonyms: &Option<std::collections::BTreeMap<String, Vec<String>>>, req: &HttpRequest| {
-        use serde_json::json;
-
         analytics.publish(
-            "Synonyms Updated".to_string(),
-            json!({
-                "synonyms": {
-                    "total": synonyms.as_ref().map(|synonyms| synonyms.len()),
-                },
-            }),
+            crate::routes::indexes::settings::SynonymsAnalytics::new(synonyms.as_ref()).to_settings(),
             Some(req),
         );
     }
@@ -436,14 +347,8 @@ make_setting_route!(
     "distinctAttribute",
     analytics,
     |distinct: &Option<String>, req: &HttpRequest| {
-        use serde_json::json;
         analytics.publish(
-            "DistinctAttribute Updated".to_string(),
-            json!({
-                "distinct_attribute": {
-                    "set": distinct.is_some(),
-                }
-            }),
+            crate::routes::indexes::settings::DistinctAttributeAnalytics::new(distinct.as_ref()).to_settings(),
             Some(req),
         );
     }
@@ -460,15 +365,8 @@ make_setting_route!(
     "proximityPrecision",
     analytics,
     |precision: &Option<meilisearch_types::settings::ProximityPrecisionView>, req: &HttpRequest| {
-        use serde_json::json;
         analytics.publish(
-            "ProximityPrecision Updated".to_string(),
-            json!({
-                "proximity_precision": {
-                    "set": precision.is_some(),
-                    "value": precision.unwrap_or_default(),
-                }
-            }),
+            crate::routes::indexes::settings::ProximityPrecisionAnalytics::new(precision.as_ref()).to_settings(),
             Some(req),
         );
     }
@@ -485,12 +383,8 @@ make_setting_route!(
     "localizedAttributes",
     analytics,
     |rules: &Option<Vec<meilisearch_types::locales::LocalizedAttributesRuleView>>, req: &HttpRequest| {
-        use serde_json::json;
         analytics.publish(
-            "LocalizedAttributesRules Updated".to_string(),
-            json!({
-                "locales": rules.as_ref().map(|rules| rules.iter().flat_map(|rule| rule.locales.iter().cloned()).collect::<std::collections::BTreeSet<_>>())
-            }),
+            crate::routes::indexes::settings::LocalesAnalytics::new(rules.as_ref()).to_settings(),
             Some(req),
         );
     }
@@ -507,21 +401,8 @@ make_setting_route!(
     "rankingRules",
     analytics,
     |setting: &Option<Vec<meilisearch_types::settings::RankingRuleView>>, req: &HttpRequest| {
-        use serde_json::json;
-
         analytics.publish(
-            "RankingRules Updated".to_string(),
-            json!({
-                "ranking_rules": {
-                    "words_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words))),
-                    "typo_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo))),
-                    "proximity_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Proximity))),
-                    "attribute_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Attribute))),
-                    "sort_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort))),
-                    "exactness_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Exactness))),
-                    "values": setting.as_ref().map(|rr| rr.iter().filter(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Asc(_) | meilisearch_types::settings::RankingRuleView::Desc(_)) ).map(|x| x.to_string()).collect::<Vec<_>>().join(", ")),
-                }
-            }),
+            crate::routes::indexes::settings::RankingRulesAnalytics::new(setting.as_ref()).to_settings(),
             Some(req),
         );
     }
@@ -538,20 +419,8 @@ make_setting_route!(
     "faceting",
     analytics,
     |setting: &Option<meilisearch_types::settings::FacetingSettings>, req: &HttpRequest| {
-        use serde_json::json;
-        use meilisearch_types::facet_values_sort::FacetValuesSort;
-
         analytics.publish(
-            "Faceting Updated".to_string(),
-            json!({
-                "faceting": {
-                    "max_values_per_facet": setting.as_ref().and_then(|s| s.max_values_per_facet.set()),
-                    "sort_facet_values_by_star_count": setting.as_ref().and_then(|s| {
-                        s.sort_facet_values_by.as_ref().set().map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count))
-                    }),
-                    "sort_facet_values_by_total": setting.as_ref().and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())),
-                },
-            }),
+            crate::routes::indexes::settings::FacetingAnalytics::new(setting.as_ref()).to_settings(),
             Some(req),
         );
     }
@@ -568,15 +437,8 @@ make_setting_route!(
     "pagination",
     analytics,
     |setting: &Option<meilisearch_types::settings::PaginationSettings>, req: &HttpRequest| {
-        use serde_json::json;
-
         analytics.publish(
-            "Pagination Updated".to_string(),
-            json!({
-                "pagination": {
-                    "max_total_hits": setting.as_ref().and_then(|s| s.max_total_hits.set()),
-                },
-            }),
+            crate::routes::indexes::settings::PaginationAnalytics::new(setting.as_ref()).to_settings(),
             Some(req),
         );
     }
@@ -593,11 +455,8 @@ make_setting_route!(
     "embedders",
     analytics,
     |setting: &Option<std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>>, req: &HttpRequest| {
-
-
         analytics.publish(
-            "Embedders Updated".to_string(),
-            serde_json::json!({"embedders": crate::routes::indexes::settings::embedder_analytics(setting.as_ref())}),
+            crate::routes::indexes::settings::EmbeddersAnalytics::new(setting.as_ref()).to_settings(),
             Some(req),
         );
     }
@@ -651,10 +510,15 @@ fn embedder_analytics(
 
     json!(
         {
+            // last
             "total": setting.as_ref().map(|s| s.len()),
+            // Merge the sources
             "sources": sources,
+            // |=
             "document_template_used": document_template_used,
+            // max
             "document_template_max_bytes": document_template_max_bytes,
+            // |=
             "binary_quantization_used": binary_quantization_used,
         }
     )
@@ -672,8 +536,7 @@ make_setting_route!(
     analytics,
     |setting: &Option<u64>, req: &HttpRequest| {
         analytics.publish(
-            "Search Cutoff Updated".to_string(),
-            serde_json::json!({"search_cutoff_ms": setting }),
+            crate::routes::indexes::settings::SearchCutoffMsAnalytics::new(setting.as_ref()).to_settings(),
             Some(req),
         );
     }
@@ -714,13 +577,639 @@ generate_configure!(
     search_cutoff_ms
 );
 
+#[derive(Serialize, Default)]
+struct SettingsAnalytics {
+    ranking_rules: RankingRulesAnalytics,
+    searchable_attributes: SearchableAttributesAnalytics,
+    displayed_attributes: DisplayedAttributesAnalytics,
+    sortable_attributes: SortableAttributesAnalytics,
+    filterable_attributes: FilterableAttributesAnalytics,
+    distinct_attribute: DistinctAttributeAnalytics,
+    proximity_precision: ProximityPrecisionAnalytics,
+    typo_tolerance: TypoToleranceAnalytics,
+    faceting: FacetingAnalytics,
+    pagination: PaginationAnalytics,
+    stop_words: StopWordsAnalytics,
+    synonyms: SynonymsAnalytics,
+    embedders: EmbeddersAnalytics,
+    search_cutoff_ms: SearchCutoffMsAnalytics,
+    locales: LocalesAnalytics,
+    dictionary: DictionaryAnalytics,
+    separator_tokens: SeparatorTokensAnalytics,
+    non_separator_tokens: NonSeparatorTokensAnalytics,
+}
+
+impl Aggregate for SettingsAnalytics {
+    fn event_name(&self) -> &'static str {
+        "Settings Updated"
+    }
+
+    fn aggregate(self, other: Self) -> Self
+    where
+        Self: Sized,
+    {
+        Self {
+            ranking_rules: RankingRulesAnalytics {
+                words_position: self
+                    .ranking_rules
+                    .words_position
+                    .or(other.ranking_rules.words_position),
+                typo_position: self
+                    .ranking_rules
+                    .typo_position
+                    .or(other.ranking_rules.typo_position),
+                proximity_position: self
+                    .ranking_rules
+                    .proximity_position
+                    .or(other.ranking_rules.proximity_position),
+                attribute_position: self
+                    .ranking_rules
+                    .attribute_position
+                    .or(other.ranking_rules.attribute_position),
+                sort_position: self
+                    .ranking_rules
+                    .sort_position
+                    .or(other.ranking_rules.sort_position),
+                exactness_position: self
+                    .ranking_rules
+                    .exactness_position
+                    .or(other.ranking_rules.exactness_position),
+                values: self.ranking_rules.values.or(other.ranking_rules.values),
+            },
+            searchable_attributes: SearchableAttributesAnalytics {
+                total: self.searchable_attributes.total.or(other.searchable_attributes.total),
+                with_wildcard: self
+                    .searchable_attributes
+                    .with_wildcard
+                    .or(other.searchable_attributes.with_wildcard),
+            },
+            displayed_attributes: DisplayedAttributesAnalytics {
+                total: self.displayed_attributes.total.or(other.displayed_attributes.total),
+                with_wildcard: self
+                    .displayed_attributes
+                    .with_wildcard
+                    .or(other.displayed_attributes.with_wildcard),
+            },
+            sortable_attributes: SortableAttributesAnalytics {
+                total: self.sortable_attributes.total.or(other.sortable_attributes.total),
+                has_geo: self.sortable_attributes.has_geo.or(other.sortable_attributes.has_geo),
+            },
+            filterable_attributes: FilterableAttributesAnalytics {
+                total: self.filterable_attributes.total.or(other.filterable_attributes.total),
+                has_geo: self.filterable_attributes.has_geo.or(other.filterable_attributes.has_geo),
+            },
+            distinct_attribute: DistinctAttributeAnalytics {
+                set: self.distinct_attribute.set.or(other.distinct_attribute.set),
+            },
+            proximity_precision: ProximityPrecisionAnalytics {
+                set: self.proximity_precision.set(other.proximity_precision.set),
+                value: self.proximity_precision.value(other.proximity_precision.value),
+            },
+            typo_tolerance: TypoToleranceAnalytics {
+                enabled: self.typo_tolerance.enabled.or(other.typo_tolerance.enabled),
+                disable_on_attributes: self
+                    .typo_tolerance
+                    .disable_on_attributes
+                    .or(other.typo_tolerance.disable_on_attributes),
+                disable_on_words: self
+                    .typo_tolerance
+                    .disable_on_words
+                    .or(other.typo_tolerance.disable_on_words),
+                min_word_size_for_one_typo: self
+                    .typo_tolerance
+                    .min_word_size_for_one_typo
+                    .or(other.typo_tolerance.min_word_size_for_one_typo),
+                min_word_size_for_two_typos: self
+                    .typo_tolerance
+                    .min_word_size_for_two_typos
+                    .or(other.typo_tolerance.min_word_size_for_two_typos),
+            },
+            faceting: FacetingAnalytics {
+                max_values_per_facet: self
+                    .faceting
+                    .max_values_per_facet
+                    .or(other.faceting.max_values_per_facet),
+                sort_facet_values_by_star_count: self
+                    .faceting
+                    .sort_facet_values_by_star_count
+                    .or(other.faceting.sort_facet_values_by_star_count),
+                sort_facet_values_by_total: self
+                    .faceting
+                    .sort_facet_values_by_total
+                    .or(other.faceting.sort_facet_values_by_total),
+            },
+            pagination: PaginationAnalytics {
+                max_total_hits: self.pagination.max_total_hits.or(other.pagination.max_total_hits),
+            },
+            stop_words: StopWordsAnalytics {
+                total: self.stop_words.total.or(other.stop_words.total),
+            },
+            synonyms: SynonymsAnalytics { total: self.synonyms.total.or(other.synonyms.total) },
+            embedders: EmbeddersAnalytics {
+                total: self.embedders.total.or(other.embedders.total),
+                sources: match (self.embedders.sources, other.embedders.sources) {
+                    (None, None) => None,
+                    (Some(sources), None) | (None, Some(sources)) => Some(sources),
+                    (Some(this), Some(other)) => Some(this.union(&other).collect()),
+                },
+                document_template_used: match (
+                    self.embedders.document_template_used,
+                    other.embedders.document_template_used,
+                ) {
+                    (None, None) => None,
+                    (Some(used), None) | (None, Some(used)) => Some(used),
+                    (Some(this), Some(other)) => Some(this | other),
+                },
+                document_template_max_bytes: match (
+                    self.embedders.document_template_max_bytes,
+                    other.embedders.document_template_max_bytes,
+                ) {
+                    (None, None) => None,
+                    (Some(bytes), None) | (None, Some(bytes)) => Some(bytes),
+                    (Some(this), Some(other)) => Some(this.max(other)),
+                },
+                binary_quantization_used: match (
+                    self.embedders.binary_quantization_used,
+                    other.embedders.binary_quantization_used,
+                ) {
+                    (None, None) => None,
+                    (Some(bq), None) | (None, Some(bq)) => Some(bq),
+                    (Some(this), Some(other)) => Some(this | other),
+                },
+            },
+            search_cutoff_ms: SearchCutoffMsAnalytics {
+                search_cutoff_ms: self
+                    .search_cutoff_ms
+                    .search_cutoff_ms
+                    .or(other.search_cutoff_ms.search_cutoff_ms),
+            },
+            locales: LocalesAnalytics { locales: self.locales.locales.or(other.locales.locales) },
+            dictionary: DictionaryAnalytics {
+                total: self.dictionary.total.or(other.dictionary.total),
+            },
+            separator_tokens: SeparatorTokensAnalytics {
+                total: self.separator_tokens.total.or(other.non_separator_tokens.total),
+            },
+            non_separator_tokens: NonSeparatorTokensAnalytics {
+                total: self.non_separator_tokens.total.or(other.non_separator_tokens.total),
+            },
+        }
+    }
+
+    fn into_event(self) -> impl Serialize
+    where
+        Self: Sized,
+    {
+        self
+    }
+}
+
+#[derive(Serialize, Default)]
+struct RankingRulesAnalytics {
+    words_position: Option<bool>,
+    typo_position: Option<bool>,
+    proximity_position: Option<bool>,
+    attribute_position: Option<bool>,
+    sort_position: Option<bool>,
+    exactness_position: Option<bool>,
+    values: Option<bool>,
+}
+
+impl RankingRulesAnalytics {
+    pub fn new(rr: Option<&Vec<RankingRuleView>>) -> Self {
+        RankingRulesAnalytics {
+            words_position: rr.as_ref().map(|rr| {
+                rr.iter()
+                    .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words))
+            }),
+            typo_position: rr.as_ref().map(|rr| {
+                rr.iter()
+                    .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo))
+            }),
+            proximity_position: rr.as_ref().map(|rr| {
+                rr.iter().position(|s| {
+                    matches!(s, meilisearch_types::settings::RankingRuleView::Proximity)
+                })
+            }),
+            attribute_position: rr.as_ref().map(|rr| {
+                rr.iter().position(|s| {
+                    matches!(s, meilisearch_types::settings::RankingRuleView::Attribute)
+                })
+            }),
+            sort_position: rr.as_ref().map(|rr| {
+                rr.iter()
+                    .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort))
+            }),
+            exactness_position: rr.as_ref().map(|rr| {
+                rr.iter().position(|s| {
+                    matches!(s, meilisearch_types::settings::RankingRuleView::Exactness)
+                })
+            }),
+            values: rr.as_ref().map(|rr| {
+                rr.iter()
+                    .filter(|s| {
+                        matches!(
+                            s,
+                            meilisearch_types::settings::RankingRuleView::Asc(_)
+                                | meilisearch_types::settings::RankingRuleView::Desc(_)
+                        )
+                    })
+                    .map(|x| x.to_string())
+                    .collect::<Vec<_>>()
+                    .join(", ")
+            }),
+        }
+    }
+
+    pub fn to_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { ranking_rules: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+struct SearchableAttributesAnalytics {
+    total: Option<usize>,
+    with_wildcard: bool,
+}
+
+impl SearchableAttributesAnalytics {
+    pub fn new(setting: Option<&Vec<String>>) -> Self {
+        Self {
+            total: setting.as_ref().map(|searchable| searchable.len()),
+            with_wildcard: setting
+                .as_ref()
+                .map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
+        }
+    }
+
+    pub fn to_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { searchable_attributes: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+struct DisplayedAttributesAnalytics {
+    total: usize,
+    with_wildcard: bool,
+}
+
+impl DisplayedAttributesAnalytics {
+    pub fn new(displayed: Option<&Vec<String>>) -> Self {
+        Self {
+            total: displayed.as_ref().map(|displayed| displayed.len()),
+            with_wildcard: displayed
+                .as_ref()
+                .map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
+        }
+    }
+
+    pub fn to_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { displayed_attributes: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+struct SortableAttributesAnalytics {
+    total: usize,
+    has_geo: bool,
+}
+
+impl SortableAttributesAnalytics {
+    pub fn new(setting: Option<&std::collections::BTreeSet<String>>) -> Self {
+        Self {
+            total: setting.as_ref().map(|sort| sort.len()),
+            has_geo: setting.as_ref().map(|sort| sort.contains("_geo")),
+        }
+    }
+
+    pub fn to_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { sortable_attributes: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+struct FilterableAttributesAnalytics {
+    total: usize,
+    has_geo: bool,
+}
+
+impl FilterableAttributesAnalytics {
+    pub fn new(setting: Option<&std::collections::BTreeSet<String>>) -> Self {
+        Self {
+            total: setting.as_ref().map(|filter| filter.len()).unwrap_or(0),
+            has_geo: setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false),
+        }
+    }
+
+    pub fn to_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { filterable_attributes: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+struct DistinctAttributeAnalytics {
+    set: bool,
+}
+
+impl DistinctAttributeAnalytics {
+    pub fn new(distinct: Option<&String>) -> Self {
+        Self { set: distinct.is_some() }
+    }
+
+    pub fn to_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { distinct_attribute: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+struct ProximityPrecisionAnalytics {
+    set: bool,
+    value: Option<ProximityPrecisionView>,
+}
+
+impl ProximityPrecisionAnalytics {
+    pub fn new(precision: Option<&meilisearch_types::settings::ProximityPrecisionView>) -> Self {
+        Self { set: precision.is_some(), value: precision.unwrap_or_default() }
+    }
+
+    pub fn to_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { proximity_precision: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+struct TypoToleranceAnalytics {
+    enabled: Option<bool>,
+    disable_on_attributes: Option<bool>,
+    disable_on_words: Option<bool>,
+    min_word_size_for_one_typo: Option<bool>,
+    min_word_size_for_two_typos: Option<bool>,
+}
+
+impl TypoToleranceAnalytics {
+    pub fn new(setting: Option<&meilisearch_types::settings::TypoSettings>) -> Self {
+        Self {
+            enabled: setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))),
+            disable_on_attributes: setting
+                .as_ref()
+                .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
+            disable_on_words: setting
+                .as_ref()
+                .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
+            min_word_size_for_one_typo: setting
+                .as_ref()
+                .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.one_typo.set()))
+                .flatten(),
+            min_word_size_for_two_typos: setting
+                .as_ref()
+                .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.two_typos.set()))
+                .flatten(),
+        }
+    }
+    pub fn to_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { typo_tolerance: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+struct FacetingAnalytics {
+    max_values_per_facet: Option<bool>,
+    sort_facet_values_by_star_count: Option<bool>,
+    sort_facet_values_by_total: Option<bool>,
+}
+
+impl FacetingAnalytics {
+    pub fn new(setting: Option<&meilisearch_types::settings::FacetingSettings>) -> Self {
+        Self {
+            max_values_per_facet: setting.as_ref().and_then(|s| s.max_values_per_facet.set()),
+            sort_facet_values_by_star_count: setting.as_ref().and_then(|s| {
+                s.sort_facet_values_by
+                    .as_ref()
+                    .set()
+                    .map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count))
+            }),
+            sort_facet_values_by_total: setting
+                .as_ref()
+                .and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())),
+        }
+    }
+
+    pub fn to_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { faceting: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+struct PaginationAnalytics {
+    max_total_hits: Option<bool>,
+}
+
+impl PaginationAnalytics {
+    pub fn new(setting: Option<&meilisearch_types::settings::PaginationSettings>) -> Self {
+        Self { max_total_hits: setting.as_ref().and_then(|s| s.max_total_hits.set()) }
+    }
+
+    pub fn to_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { pagination: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+struct StopWordsAnalytics {
+    total: Option<usize>,
+}
+
+impl StopWordsAnalytics {
+    pub fn new(stop_words: Option<&BTreeSet<String>>) -> Self {
+        Self { total: stop_words.as_ref().map(|stop_words| stop_words.len()) }
+    }
+
+    pub fn to_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { stop_words: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+struct SynonymsAnalytics {
+    total: Option<usize>,
+}
+
+impl SynonymsAnalytics {
+    pub fn new(synonyms: Option<&std::collections::BTreeMap<String, Vec<String>>>) -> Self {
+        Self { total: synonyms.as_ref().map(|synonyms| synonyms.len()) }
+    }
+
+    pub fn to_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { synonyms: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+struct EmbeddersAnalytics {
+    // last
+    total: Option<usize>,
+    // Merge the sources
+    sources: Option<HashSet<String>>,
+    // |=
+    document_template_used: Option<bool>,
+    // max
+    document_template_max_bytes: Option<usize>,
+    // |=
+    binary_quantization_used: Option<bool>,
+}
+
+impl EmbeddersAnalytics {
+    pub fn new(
+        setting: Option<
+            &std::collections::BTreeMap<
+                String,
+                Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>,
+            >,
+        >,
+    ) -> Self {
+        let mut sources = std::collections::HashSet::new();
+
+        if let Some(s) = &setting {
+            for source in s
+                .values()
+                .filter_map(|config| config.clone().set())
+                .filter_map(|config| config.source.set())
+            {
+                use meilisearch_types::milli::vector::settings::EmbedderSource;
+                match source {
+                    EmbedderSource::OpenAi => sources.insert("openAi"),
+                    EmbedderSource::HuggingFace => sources.insert("huggingFace"),
+                    EmbedderSource::UserProvided => sources.insert("userProvided"),
+                    EmbedderSource::Ollama => sources.insert("ollama"),
+                    EmbedderSource::Rest => sources.insert("rest"),
+                };
+            }
+        };
+
+        Self {
+            total: setting.as_ref().map(|s| s.len()),
+            sources,
+            document_template_used: setting.as_ref().map(|map| {
+                map.values()
+                    .filter_map(|config| config.clone().set())
+                    .any(|config| config.document_template.set().is_some())
+            }),
+            document_template_max_bytes: setting.as_ref().and_then(|map| {
+                map.values()
+                    .filter_map(|config| config.clone().set())
+                    .filter_map(|config| config.document_template_max_bytes.set())
+                    .max()
+            }),
+            binary_quantization_used: setting.as_ref().map(|map| {
+                map.values()
+                    .filter_map(|config| config.clone().set())
+                    .any(|config| config.binary_quantized.set().is_some())
+            }),
+        }
+    }
+
+    pub fn to_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { embedders: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+#[serde(transparent)]
+struct SearchCutoffMsAnalytics {
+    search_cutoff_ms: Option<u64>,
+}
+
+impl SearchCutoffMsAnalytics {
+    pub fn new(setting: Option<&u64>) -> Self {
+        Self { search_cutoff_ms: setting }
+    }
+
+    pub fn to_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { search_cutoff_ms: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+#[serde(transparent)]
+struct LocalesAnalytics {
+    locales: BTreeSet<String>,
+}
+
+impl LocalesAnalytics {
+    pub fn new(
+        rules: Option<&Vec<meilisearch_types::locales::LocalizedAttributesRuleView>>,
+    ) -> Self {
+        LocalesAnalytics {
+            locales: rules.as_ref().map(|rules| {
+                rules
+                    .iter()
+                    .flat_map(|rule| rule.locales.iter().cloned())
+                    .collect::<std::collections::BTreeSet<_>>()
+            }),
+        }
+    }
+
+    pub fn to_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { locales: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+struct DictionaryAnalytics {
+    total: usize,
+}
+
+impl DictionaryAnalytics {
+    pub fn new(dictionary: Option<&std::collections::BTreeSet<String>>) -> Self {
+        Self { total: dictionary.as_ref().map(|dictionary| dictionary.len()) }
+    }
+
+    pub fn to_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { dictionary: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+struct SeparatorTokensAnalytics {
+    total: usize,
+}
+
+impl SeparatorTokensAnalytics {
+    pub fn new(separator_tokens: Option<&std::collections::BTreeSet<String>>) -> Self {
+        Self { total: separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()) }
+    }
+
+    pub fn to_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { separator_tokens: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+struct NonSeparatorTokensAnalytics {
+    total: usize,
+}
+
+impl NonSeparatorTokensAnalytics {
+    pub fn new(non_separator_tokens: Option<&std::collections::BTreeSet<String>>) -> Self {
+        Self {
+            total: non_separator_tokens
+                .as_ref()
+                .map(|non_separator_tokens| non_separator_tokens.len()),
+        }
+    }
+
+    pub fn to_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { non_separator_tokens: self, ..Default::default() }
+    }
+}
+
 pub async fn update_all(
     index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
     index_uid: web::Path<String>,
     body: AwebJson<Settings<Unchecked>, DeserrJsonError>,
     req: HttpRequest,
     opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     let index_uid = IndexUid::try_from(index_uid.into_inner())?;
 
@@ -729,103 +1218,44 @@ pub async fn update_all(
     let new_settings = validate_settings(new_settings, &index_scheduler)?;
 
     analytics.publish(
-        "Settings Updated".to_string(),
-        json!({
-           "ranking_rules": {
-                "words_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Words))),
-                "typo_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Typo))),
-                "proximity_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Proximity))),
-                "attribute_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Attribute))),
-                "sort_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Sort))),
-                "exactness_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Exactness))),
-                "values": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().filter(|s| !matches!(s, RankingRuleView::Asc(_) | RankingRuleView::Desc(_)) ).map(|x| x.to_string()).collect::<Vec<_>>().join(", ")),
-            },
-            "searchable_attributes": {
-                "total": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()),
-                "with_wildcard": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
-            },
-            "displayed_attributes": {
-                "total": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.len()),
-                "with_wildcard": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
-            },
-           "sortable_attributes": {
-                "total": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.len()),
-                "has_geo": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")),
-            },
-           "filterable_attributes": {
-                "total": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.len()),
-                "has_geo": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")),
-            },
-            "distinct_attribute": {
-                "set": new_settings.distinct_attribute.as_ref().set().is_some()
-            },
-            "proximity_precision": {
-                "set": new_settings.proximity_precision.as_ref().set().is_some(),
-                "value": new_settings.proximity_precision.as_ref().set().copied().unwrap_or_default()
-            },
-            "typo_tolerance": {
-                "enabled": new_settings.typo_tolerance
-                    .as_ref()
-                    .set()
-                    .and_then(|s| s.enabled.as_ref().set())
-                    .copied(),
-                "disable_on_attributes": new_settings.typo_tolerance
-                    .as_ref()
-                    .set()
-                    .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
-                "disable_on_words": new_settings.typo_tolerance
-                    .as_ref()
-                    .set()
-                    .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
-                "min_word_size_for_one_typo": new_settings.typo_tolerance
-                    .as_ref()
-                    .set()
-                    .and_then(|s| s.min_word_size_for_typos
-                        .as_ref()
-                        .set()
-                        .map(|s| s.one_typo.set()))
-                    .flatten(),
-                "min_word_size_for_two_typos": new_settings.typo_tolerance
-                    .as_ref()
-                    .set()
-                    .and_then(|s| s.min_word_size_for_typos
-                        .as_ref()
-                        .set()
-                        .map(|s| s.two_typos.set()))
-                    .flatten(),
-            },
-            "faceting": {
-                "max_values_per_facet": new_settings.faceting
-                    .as_ref()
-                    .set()
-                    .and_then(|s| s.max_values_per_facet.as_ref().set()),
-                "sort_facet_values_by_star_count": new_settings.faceting
-                    .as_ref()
-                    .set()
-                    .and_then(|s| {
-                        s.sort_facet_values_by.as_ref().set().map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count))
-                    }),
-                "sort_facet_values_by_total": new_settings.faceting
-                    .as_ref()
-                    .set()
-                    .and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())),
-            },
-            "pagination": {
-                "max_total_hits": new_settings.pagination
-                    .as_ref()
-                    .set()
-                    .and_then(|s| s.max_total_hits.as_ref().set()),
-            },
-            "stop_words": {
-                "total": new_settings.stop_words.as_ref().set().map(|stop_words| stop_words.len()),
-            },
-            "synonyms": {
-                "total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()),
-            },
-            "embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()),
-            "search_cutoff_ms": new_settings.search_cutoff_ms.as_ref().set(),
-            "locales": new_settings.localized_attributes.as_ref().set().map(|rules| rules.iter().flat_map(|rule| rule.locales.iter().cloned()).collect::<std::collections::BTreeSet<_>>()),
-        }),
+        SettingsAnalytics {
+            ranking_rules: RankingRulesAnalytics::new(new_settings.ranking_rules.as_ref().set()),
+            searchable_attributes: SearchableAttributesAnalytics::new(
+                new_settings.searchable_attributes.as_ref().set(),
+            ),
+            displayed_attributes: DisplayedAttributesAnalytics::new(
+                new_settings.displayed_attributes.as_ref().set(),
+            ),
+            sortable_attributes: SortableAttributesAnalytics::new(
+                new_settings.sortable_attributes.as_ref().set(),
+            ),
+            filterable_attributes: FilterableAttributesAnalytics::new(
+                new_settings.filterable_attributes.as_ref().set(),
+            ),
+            distinct_attribute: DistinctAttributeAnalytics::new(
+                new_settings.distinct_attribute.as_ref().set(),
+            ),
+            proximity_precision: ProximityPrecisionAnalytics::new(
+                new_settings.proximity_precision.as_ref().set(),
+            ),
+            typo_tolerance: TypoToleranceAnalytics::new(new_settings.typo_tolerance.as_ref().set()),
+            faceting: FacetingAnalytics::new(new_settings.faceting.as_ref().set()),
+            pagination: PaginationAnalytics::new(new_settings.pagination.as_ref().set()),
+            stop_words: StopWordsAnalytics::new(new_settings.stop_words.as_ref().set()),
+            synonyms: SynonymsAnalytics::new(new_settings.synonyms.as_ref().set()),
+            embedders: EmbeddersAnalytics::new(new_settings.embedders.as_ref().set()),
+            search_cutoff_ms: SearchCutoffMsAnalytics::new(
+                new_settings.search_cutoff_ms.as_ref().set(),
+            ),
+            locales: LocalesAnalytics::new(new_settings.localized_attributes.as_ref().set()),
+            dictionary: DictionaryAnalytics::new(new_settings.dictionary.as_ref().set()),
+            separator_tokens: SeparatorTokensAnalytics::new(
+                new_settings.separator_tokens.as_ref().set(),
+            ),
+            non_separator_tokens: NonSeparatorTokensAnalytics::new(
+                new_settings.non_separator_tokens.as_ref().set(),
+            ),
+        },
         Some(&req),
     );
 
diff --git a/meilisearch/src/routes/swap_indexes.rs b/meilisearch/src/routes/swap_indexes.rs
index 51a7b0707..34e904230 100644
--- a/meilisearch/src/routes/swap_indexes.rs
+++ b/meilisearch/src/routes/swap_indexes.rs
@@ -40,7 +40,7 @@ pub async fn swap_indexes(
     analytics.publish(
         "Indexes Swapped".to_string(),
         json!({
-            "swap_operation_number": params.len(),
+            "swap_operation_number": params.len(), // Return the max ever encountered
         }),
         Some(&req),
     );

From e66fccc3f2e8c9ef9f576f9484d1135bf02716e6 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 16 Oct 2024 15:51:48 +0200
Subject: [PATCH 52/92] get rids of the analytics closure

---
 meilisearch/src/routes/indexes/settings.rs | 216 +++------------------
 1 file changed, 24 insertions(+), 192 deletions(-)

diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs
index 112f8671b..db83cb39b 100644
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -14,7 +14,6 @@ use meilisearch_types::settings::{
 };
 use meilisearch_types::tasks::KindWithContent;
 use serde::Serialize;
-use serde_json::json;
 use tracing::debug;
 
 use crate::analytics::{Aggregate, Analytics};
@@ -25,7 +24,7 @@ use crate::Opt;
 
 #[macro_export]
 macro_rules! make_setting_route {
-    ($route:literal, $update_verb:ident, $type:ty, $err_ty:ty, $attr:ident, $camelcase_attr:literal, $analytics_var:ident, $analytics:expr) => {
+    ($route:literal, $update_verb:ident, $type:ty, $err_ty:ty, $attr:ident, $camelcase_attr:literal, $analytics:ident) => {
         pub mod $attr {
             use actix_web::web::Data;
             use actix_web::{web, HttpRequest, HttpResponse, Resource};
@@ -85,7 +84,7 @@ macro_rules! make_setting_route {
                 body: deserr::actix_web::AwebJson<Option<$type>, $err_ty>,
                 req: HttpRequest,
                 opt: web::Data<Opt>,
-                $analytics_var: web::Data<Analytics>,
+                analytics: web::Data<Analytics>,
             ) -> std::result::Result<HttpResponse, ResponseError> {
                 let index_uid = IndexUid::try_from(index_uid.into_inner())?;
 
@@ -93,7 +92,10 @@ macro_rules! make_setting_route {
                 debug!(parameters = ?body, "Update settings");
 
                 #[allow(clippy::redundant_closure_call)]
-                $analytics(&body, &req);
+                analytics.publish(
+                    $crate::routes::indexes::settings::$analytics::new(body.as_ref()).to_settings(),
+                    Some(&req),
+                );
 
                 let new_settings = Settings {
                     $attr: match body {
@@ -165,13 +167,7 @@ make_setting_route!(
     >,
     filterable_attributes,
     "filterableAttributes",
-    analytics,
-    |setting: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        analytics.publish(
-            crate::routes::indexes::settings::FilterableAttributesAnalytics::new(setting.as_ref()).to_settings(),
-            Some(req),
-        );
-    }
+    FilterableAttributesAnalytics
 );
 
 make_setting_route!(
@@ -183,13 +179,7 @@ make_setting_route!(
     >,
     sortable_attributes,
     "sortableAttributes",
-    analytics,
-    |setting: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        analytics.publish(
-            crate::routes::indexes::settings::SortableAttributesAnalytics::new(setting.as_ref()).to_settings(),
-            Some(req),
-        );
-    }
+    SortableAttributesAnalytics
 );
 
 make_setting_route!(
@@ -201,13 +191,7 @@ make_setting_route!(
     >,
     displayed_attributes,
     "displayedAttributes",
-    analytics,
-    |displayed: &Option<Vec<String>>, req: &HttpRequest| {
-        analytics.publish(
-            crate::routes::indexes::settings::DisplayedAttributesAnalytics::new(displayed.as_ref()).to_settings(),
-            Some(req),
-        );
-    }
+    DisplayedAttributesAnalytics
 );
 
 make_setting_route!(
@@ -219,13 +203,7 @@ make_setting_route!(
     >,
     typo_tolerance,
     "typoTolerance",
-    analytics,
-    |setting: &Option<meilisearch_types::settings::TypoSettings>, req: &HttpRequest| {
-        analytics.publish(
-            crate::routes::indexes::settings::TypoToleranceAnalytics::new(setting.as_ref()).to_settings(),
-            Some(req),
-        );
-    }
+    TypoToleranceAnalytics
 );
 
 make_setting_route!(
@@ -237,13 +215,7 @@ make_setting_route!(
     >,
     searchable_attributes,
     "searchableAttributes",
-    analytics,
-    |setting: &Option<Vec<String>>, req: &HttpRequest| {
-        analytics.publish(
-            crate::routes::indexes::settings::SearchableAttributesAnalytics::new(setting.as_ref()).to_settings(),
-            Some(req),
-        );
-    }
+    SearchableAttributesAnalytics
 );
 
 make_setting_route!(
@@ -255,13 +227,7 @@ make_setting_route!(
     >,
     stop_words,
     "stopWords",
-    analytics,
-    |stop_words: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        analytics.publish(
-            crate::routes::indexes::settings::StopWordsAnalytics::new(stop_words.as_ref()).to_settings(),
-            Some(req),
-        );
-    }
+    StopWordsAnalytics
 );
 
 make_setting_route!(
@@ -273,13 +239,7 @@ make_setting_route!(
     >,
     non_separator_tokens,
     "nonSeparatorTokens",
-    analytics,
-    |non_separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        analytics.publish(
-            crate::routes::indexes::settings::NonSeparatorTokensAnalytics::new(non_separator_tokens.as_ref()).to_settings(),
-            Some(req),
-        );
-    }
+    NonSeparatorTokensAnalytics
 );
 
 make_setting_route!(
@@ -291,13 +251,7 @@ make_setting_route!(
     >,
     separator_tokens,
     "separatorTokens",
-    analytics,
-    |separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        analytics.publish(
-            crate::routes::indexes::settings::SeparatorTokensAnalytics::new(separator_tokens.as_ref()).to_settings(),
-            Some(req),
-        );
-    }
+    SeparatorTokensAnalytics
 );
 
 make_setting_route!(
@@ -309,13 +263,7 @@ make_setting_route!(
     >,
     dictionary,
     "dictionary",
-    analytics,
-    |dictionary: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        analytics.publish(
-            crate::routes::indexes::settings::DictionaryAnalytics::new(dictionary.as_ref()).to_settings(),
-            Some(req),
-        );
-    }
+    DictionaryAnalytics
 );
 
 make_setting_route!(
@@ -327,13 +275,7 @@ make_setting_route!(
     >,
     synonyms,
     "synonyms",
-    analytics,
-    |synonyms: &Option<std::collections::BTreeMap<String, Vec<String>>>, req: &HttpRequest| {
-        analytics.publish(
-            crate::routes::indexes::settings::SynonymsAnalytics::new(synonyms.as_ref()).to_settings(),
-            Some(req),
-        );
-    }
+    SynonymsAnalytics
 );
 
 make_setting_route!(
@@ -345,13 +287,7 @@ make_setting_route!(
     >,
     distinct_attribute,
     "distinctAttribute",
-    analytics,
-    |distinct: &Option<String>, req: &HttpRequest| {
-        analytics.publish(
-            crate::routes::indexes::settings::DistinctAttributeAnalytics::new(distinct.as_ref()).to_settings(),
-            Some(req),
-        );
-    }
+    DistinctAttributeAnalytics
 );
 
 make_setting_route!(
@@ -363,13 +299,7 @@ make_setting_route!(
     >,
     proximity_precision,
     "proximityPrecision",
-    analytics,
-    |precision: &Option<meilisearch_types::settings::ProximityPrecisionView>, req: &HttpRequest| {
-        analytics.publish(
-            crate::routes::indexes::settings::ProximityPrecisionAnalytics::new(precision.as_ref()).to_settings(),
-            Some(req),
-        );
-    }
+    ProximityPrecisionAnalytics
 );
 
 make_setting_route!(
@@ -381,13 +311,7 @@ make_setting_route!(
     >,
     localized_attributes,
     "localizedAttributes",
-    analytics,
-    |rules: &Option<Vec<meilisearch_types::locales::LocalizedAttributesRuleView>>, req: &HttpRequest| {
-        analytics.publish(
-            crate::routes::indexes::settings::LocalesAnalytics::new(rules.as_ref()).to_settings(),
-            Some(req),
-        );
-    }
+    LocalesAnalytics
 );
 
 make_setting_route!(
@@ -399,13 +323,7 @@ make_setting_route!(
     >,
     ranking_rules,
     "rankingRules",
-    analytics,
-    |setting: &Option<Vec<meilisearch_types::settings::RankingRuleView>>, req: &HttpRequest| {
-        analytics.publish(
-            crate::routes::indexes::settings::RankingRulesAnalytics::new(setting.as_ref()).to_settings(),
-            Some(req),
-        );
-    }
+    RankingRulesAnalytics
 );
 
 make_setting_route!(
@@ -417,13 +335,7 @@ make_setting_route!(
     >,
     faceting,
     "faceting",
-    analytics,
-    |setting: &Option<meilisearch_types::settings::FacetingSettings>, req: &HttpRequest| {
-        analytics.publish(
-            crate::routes::indexes::settings::FacetingAnalytics::new(setting.as_ref()).to_settings(),
-            Some(req),
-        );
-    }
+    FacetingAnalytics
 );
 
 make_setting_route!(
@@ -435,13 +347,7 @@ make_setting_route!(
     >,
     pagination,
     "pagination",
-    analytics,
-    |setting: &Option<meilisearch_types::settings::PaginationSettings>, req: &HttpRequest| {
-        analytics.publish(
-            crate::routes::indexes::settings::PaginationAnalytics::new(setting.as_ref()).to_settings(),
-            Some(req),
-        );
-    }
+    PaginationAnalytics
 );
 
 make_setting_route!(
@@ -453,77 +359,9 @@ make_setting_route!(
     >,
     embedders,
     "embedders",
-    analytics,
-    |setting: &Option<std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>>, req: &HttpRequest| {
-        analytics.publish(
-            crate::routes::indexes::settings::EmbeddersAnalytics::new(setting.as_ref()).to_settings(),
-            Some(req),
-        );
-    }
+    EmbeddersAnalytics
 );
 
-fn embedder_analytics(
-    setting: Option<
-        &std::collections::BTreeMap<
-            String,
-            Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>,
-        >,
-    >,
-) -> serde_json::Value {
-    let mut sources = std::collections::HashSet::new();
-
-    if let Some(s) = &setting {
-        for source in s
-            .values()
-            .filter_map(|config| config.clone().set())
-            .filter_map(|config| config.source.set())
-        {
-            use meilisearch_types::milli::vector::settings::EmbedderSource;
-            match source {
-                EmbedderSource::OpenAi => sources.insert("openAi"),
-                EmbedderSource::HuggingFace => sources.insert("huggingFace"),
-                EmbedderSource::UserProvided => sources.insert("userProvided"),
-                EmbedderSource::Ollama => sources.insert("ollama"),
-                EmbedderSource::Rest => sources.insert("rest"),
-            };
-        }
-    };
-
-    let document_template_used = setting.as_ref().map(|map| {
-        map.values()
-            .filter_map(|config| config.clone().set())
-            .any(|config| config.document_template.set().is_some())
-    });
-
-    let document_template_max_bytes = setting.as_ref().and_then(|map| {
-        map.values()
-            .filter_map(|config| config.clone().set())
-            .filter_map(|config| config.document_template_max_bytes.set())
-            .max()
-    });
-
-    let binary_quantization_used = setting.as_ref().map(|map| {
-        map.values()
-            .filter_map(|config| config.clone().set())
-            .any(|config| config.binary_quantized.set().is_some())
-    });
-
-    json!(
-        {
-            // last
-            "total": setting.as_ref().map(|s| s.len()),
-            // Merge the sources
-            "sources": sources,
-            // |=
-            "document_template_used": document_template_used,
-            // max
-            "document_template_max_bytes": document_template_max_bytes,
-            // |=
-            "binary_quantization_used": binary_quantization_used,
-        }
-    )
-}
-
 make_setting_route!(
     "/search-cutoff-ms",
     put,
@@ -533,13 +371,7 @@ make_setting_route!(
     >,
     search_cutoff_ms,
     "searchCutoffMs",
-    analytics,
-    |setting: &Option<u64>, req: &HttpRequest| {
-        analytics.publish(
-            crate::routes::indexes::settings::SearchCutoffMsAnalytics::new(setting.as_ref()).to_settings(),
-            Some(req),
-        );
-    }
+    SearchCutoffMsAnalytics
 );
 
 macro_rules! generate_configure {

From fdeb47fb549a242d318a17195e1a804e50aef5dd Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 16 Oct 2024 17:16:33 +0200
Subject: [PATCH 53/92] implements all routes

---
 meilisearch/src/analytics/mod.rs              |  14 +-
 .../src/analytics/segment_analytics.rs        | 239 +++++++-----------
 meilisearch/src/routes/dump.rs                |   2 +-
 meilisearch/src/routes/features.rs            |   8 +-
 meilisearch/src/routes/indexes/documents.rs   |  20 +-
 .../src/routes/indexes/facet_search.rs        |   2 +-
 meilisearch/src/routes/indexes/mod.rs         |   4 +-
 meilisearch/src/routes/indexes/search.rs      |   4 +-
 meilisearch/src/routes/indexes/settings.rs    | 152 ++++++-----
 meilisearch/src/routes/indexes/similar.rs     |  13 +-
 meilisearch/src/routes/multi_search.rs        |   6 +-
 meilisearch/src/routes/snapshot.rs            |   7 +-
 meilisearch/src/routes/swap_indexes.rs        |  32 ++-
 meilisearch/src/routes/tasks.rs               | 129 +++++++---
 14 files changed, 337 insertions(+), 295 deletions(-)

diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs
index a8658d830..a0ca47d8f 100644
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@@ -16,7 +16,9 @@ use serde::Serialize;
 // if the feature analytics is enabled we use the real analytics
 pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
 pub use segment_analytics::SearchAggregator;
-pub type SimilarAggregator = segment_analytics::SimilarAggregator;
+pub use segment_analytics::SimilarAggregator;
+
+use self::segment_analytics::extract_user_agents;
 pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
 pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
 
@@ -32,14 +34,11 @@ macro_rules! empty_analytics {
                 $event_name
             }
 
-            fn aggregate(self, other: Self) -> Self
-            where
-                Self: Sized,
-            {
+            fn aggregate(self, _other: Self) -> Self {
                 self
             }
 
-            fn into_event(self) -> serde_json::Value {
+            fn into_event(self) -> impl serde::Serialize {
                 serde_json::json!({})
             }
         }
@@ -150,7 +149,8 @@ impl Analytics {
     }
 
     /// The method used to publish most analytics that do not need to be batched every hours
-    pub fn publish(&self, send: impl Aggregate, request: Option<&HttpRequest>) {
+    pub fn publish(&self, send: impl Aggregate, request: &HttpRequest) {
         let Some(segment) = self.inner else { return };
+        let user_agents = extract_user_agents(request);
     }
 }
diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 8a6dfd780..0572267e1 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -71,25 +71,8 @@ pub fn extract_user_agents(request: &HttpRequest) -> Vec<String> {
         .collect()
 }
 
-pub enum AnalyticsMsg {
-    BatchMessage(Track),
-    AggregateGetSearch(SearchAggregator),
-    AggregatePostSearch(SearchAggregator),
-    AggregateGetSimilar(SimilarAggregator),
-    AggregatePostSimilar(SimilarAggregator),
-    AggregatePostMultiSearch(MultiSearchAggregator),
-    AggregatePostFacetSearch(FacetSearchAggregator),
-    AggregateAddDocuments(DocumentsAggregator),
-    AggregateDeleteDocuments(DocumentsDeletionAggregator),
-    AggregateUpdateDocuments(DocumentsAggregator),
-    AggregateEditDocumentsByFunction(EditDocumentsByFunctionAggregator),
-    AggregateGetFetchDocuments(DocumentsFetchAggregator),
-    AggregatePostFetchDocuments(DocumentsFetchAggregator),
-}
-
 pub struct SegmentAnalytics {
     pub instance_uid: InstanceUid,
-    sender: Sender<AnalyticsMsg>,
     pub user: User,
 }
 
@@ -1083,8 +1066,6 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
 
 #[derive(Default)]
 pub struct MultiSearchAggregator {
-    timestamp: Option<OffsetDateTime>,
-
     // requests
     total_received: usize,
     total_succeeded: usize,
@@ -1103,9 +1084,6 @@ pub struct MultiSearchAggregator {
 
     // federation
     use_federation: bool,
-
-    // context
-    user_agents: HashSet<String>,
 }
 
 impl MultiSearchAggregator {
@@ -1113,10 +1091,6 @@ impl MultiSearchAggregator {
         federated_search: &FederatedSearch,
         request: &HttpRequest,
     ) -> Self {
-        let timestamp = Some(OffsetDateTime::now_utc());
-
-        let user_agents = extract_user_agents(request).into_iter().collect();
-
         let use_federation = federated_search.federation.is_some();
 
         let distinct_indexes: HashSet<_> = federated_search
@@ -1166,7 +1140,6 @@ impl MultiSearchAggregator {
             federated_search.queries.iter().any(|query| query.show_ranking_score_details);
 
         Self {
-            timestamp,
             total_received: 1,
             total_succeeded: 0,
             total_distinct_index_count: distinct_indexes.len(),
@@ -1174,7 +1147,6 @@ impl MultiSearchAggregator {
             total_search_count: federated_search.queries.len(),
             show_ranking_score,
             show_ranking_score_details,
-            user_agents,
             use_federation,
         }
     }
@@ -1182,15 +1154,20 @@ impl MultiSearchAggregator {
     pub fn succeed(&mut self) {
         self.total_succeeded = self.total_succeeded.saturating_add(1);
     }
+}
+
+impl Aggregate for MultiSearchAggregator {
+    fn event_name(&self) -> &'static str {
+        "Documents Searched by Multi-Search POST"
+    }
 
     /// Aggregate one [MultiSearchAggregator] into another.
-    pub fn aggregate(&mut self, other: Self) {
+    fn aggregate(mut self, other: Self) -> Self {
         // write the aggregate in a way that will cause a compilation error if a field is added.
 
         // get ownership of self, replacing it by a default value.
-        let this = std::mem::take(self);
+        let this = self;
 
-        let timestamp = this.timestamp.or(other.timestamp);
         let total_received = this.total_received.saturating_add(other.total_received);
         let total_succeeded = this.total_succeeded.saturating_add(other.total_succeeded);
         let total_distinct_index_count =
@@ -1207,75 +1184,53 @@ impl MultiSearchAggregator {
             user_agents.insert(user_agent);
         }
 
-        // need all fields or compile error
-        let mut aggregated = Self {
-            timestamp,
+        Self {
             total_received,
             total_succeeded,
             total_distinct_index_count,
             total_single_index,
             total_search_count,
-            user_agents,
             show_ranking_score,
             show_ranking_score_details,
             use_federation,
-            // do not add _ or ..Default::default() here
-        };
-
-        // replace the default self with the aggregated value
-        std::mem::swap(self, &mut aggregated);
+        }
     }
 
-    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
+    fn into_event(self) -> impl Serialize {
         let Self {
-            timestamp,
             total_received,
             total_succeeded,
             total_distinct_index_count,
             total_single_index,
             total_search_count,
-            user_agents,
             show_ranking_score,
             show_ranking_score_details,
             use_federation,
         } = self;
 
-        if total_received == 0 {
-            None
-        } else {
-            let properties = json!({
-                "user-agent": user_agents,
-                "requests": {
-                    "total_succeeded": total_succeeded,
-                    "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
-                    "total_received": total_received,
-                },
-                "indexes": {
-                    "total_single_index": total_single_index,
-                    "total_distinct_index_count": total_distinct_index_count,
-                    "avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early
-                },
-                "searches": {
-                    "total_search_count": total_search_count,
-                    "avg_search_count": (total_search_count as f64) / (total_received as f64),
-                },
-                "scoring": {
-                    "show_ranking_score": show_ranking_score,
-                    "show_ranking_score_details": show_ranking_score_details,
-                },
-                "federation": {
-                    "use_federation": use_federation,
-                }
-            });
-
-            Some(Track {
-                timestamp,
-                user: user.clone(),
-                event: event_name.to_string(),
-                properties,
-                ..Default::default()
-            })
-        }
+        json!({
+            "requests": {
+                "total_succeeded": total_succeeded,
+                "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
+                "total_received": total_received,
+            },
+            "indexes": {
+                "total_single_index": total_single_index,
+                "total_distinct_index_count": total_distinct_index_count,
+                "avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early
+            },
+            "searches": {
+                "total_search_count": total_search_count,
+                "avg_search_count": (total_search_count as f64) / (total_received as f64),
+            },
+            "scoring": {
+                "show_ranking_score": show_ranking_score,
+                "show_ranking_score_details": show_ranking_score_details,
+            },
+            "federation": {
+                "use_federation": use_federation,
+            }
+        })
     }
 }
 
@@ -1752,13 +1707,13 @@ impl DocumentsFetchAggregator {
     }
 }
 
+aggregate_methods!(
+    SimilarPOST => "Similar POST",
+    SimilarGET => "Similar GET",
+);
+
 #[derive(Default)]
-pub struct SimilarAggregator {
-    timestamp: Option<OffsetDateTime>,
-
-    // context
-    user_agents: HashSet<String>,
-
+pub struct SimilarAggregator<Method: AggregateMethod> {
     // requests
     total_received: usize,
     total_succeeded: usize,
@@ -1787,9 +1742,11 @@ pub struct SimilarAggregator {
     show_ranking_score: bool,
     show_ranking_score_details: bool,
     ranking_score_threshold: bool,
+
+    marker: std::marker::PhantomData<Method>,
 }
 
-impl SimilarAggregator {
+impl<Method: AggregateMethod> SimilarAggregator<Method> {
     #[allow(clippy::field_reassign_with_default)]
     pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self {
         let SimilarQuery {
@@ -1854,12 +1811,16 @@ impl SimilarAggregator {
 
         self.time_spent.push(*processing_time_ms as usize);
     }
+}
+
+impl<Method: AggregateMethod> Aggregate for SimilarAggregator<Method> {
+    fn event_name(&self) -> &'static str {
+        Method::event_name()
+    }
 
     /// Aggregate one [SimilarAggregator] into another.
-    pub fn aggregate(&mut self, mut other: Self) {
+    fn aggregate(mut self, mut other: Self) -> Self {
         let Self {
-            timestamp,
-            user_agents,
             total_received,
             total_succeeded,
             ref mut time_spent,
@@ -1875,17 +1836,9 @@ impl SimilarAggregator {
             show_ranking_score_details,
             ranking_score_threshold,
             retrieve_vectors,
+            marker: _,
         } = other;
 
-        if self.timestamp.is_none() {
-            self.timestamp = timestamp;
-        }
-
-        // context
-        for user_agent in user_agents.into_iter() {
-            self.user_agents.insert(user_agent);
-        }
-
         // request
         self.total_received = self.total_received.saturating_add(total_received);
         self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
@@ -1917,12 +1870,12 @@ impl SimilarAggregator {
         self.show_ranking_score |= show_ranking_score;
         self.show_ranking_score_details |= show_ranking_score_details;
         self.ranking_score_threshold |= ranking_score_threshold;
+
+        self
     }
 
-    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
+    fn into_event(self) -> impl Serialize {
         let Self {
-            timestamp,
-            user_agents,
             total_received,
             total_succeeded,
             time_spent,
@@ -1938,56 +1891,44 @@ impl SimilarAggregator {
             show_ranking_score_details,
             ranking_score_threshold,
             retrieve_vectors,
+            marker: _,
         } = self;
 
-        if total_received == 0 {
-            None
-        } else {
-            // we get all the values in a sorted manner
-            let time_spent = time_spent.into_sorted_vec();
-            // the index of the 99th percentage of value
-            let percentile_99th = time_spent.len() * 99 / 100;
-            // We are only interested by the slowest value of the 99th fastest results
-            let time_spent = time_spent.get(percentile_99th);
+        // we get all the values in a sorted manner
+        let time_spent = time_spent.into_sorted_vec();
+        // the index of the 99th percentage of value
+        let percentile_99th = time_spent.len() * 99 / 100;
+        // We are only interested by the slowest value of the 99th fastest results
+        let time_spent = time_spent.get(percentile_99th);
 
-            let properties = json!({
-                "user-agent": user_agents,
-                "requests": {
-                    "99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
-                    "total_succeeded": total_succeeded,
-                    "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
-                    "total_received": total_received,
-                },
-                "filter": {
-                   "with_geoRadius": filter_with_geo_radius,
-                   "with_geoBoundingBox": filter_with_geo_bounding_box,
-                   "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
-                   "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
-                },
-                "vector": {
-                    "retrieve_vectors": retrieve_vectors,
-                },
-                "pagination": {
-                   "max_limit": max_limit,
-                   "max_offset": max_offset,
-                },
-                "formatting": {
-                    "max_attributes_to_retrieve": max_attributes_to_retrieve,
-                },
-                "scoring": {
-                    "show_ranking_score": show_ranking_score,
-                    "show_ranking_score_details": show_ranking_score_details,
-                    "ranking_score_threshold": ranking_score_threshold,
-                },
-            });
-
-            Some(Track {
-                timestamp,
-                user: user.clone(),
-                event: event_name.to_string(),
-                properties,
-                ..Default::default()
-            })
-        }
+        json!({
+            "requests": {
+                "99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
+                "total_succeeded": total_succeeded,
+                "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
+                "total_received": total_received,
+            },
+            "filter": {
+               "with_geoRadius": filter_with_geo_radius,
+               "with_geoBoundingBox": filter_with_geo_bounding_box,
+               "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
+               "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
+            },
+            "vector": {
+                "retrieve_vectors": retrieve_vectors,
+            },
+            "pagination": {
+               "max_limit": max_limit,
+               "max_offset": max_offset,
+            },
+            "formatting": {
+                "max_attributes_to_retrieve": max_attributes_to_retrieve,
+            },
+            "scoring": {
+                "show_ranking_score": show_ranking_score,
+                "show_ranking_score_details": show_ranking_score_details,
+                "ranking_score_threshold": ranking_score_threshold,
+            }
+        })
     }
 }
diff --git a/meilisearch/src/routes/dump.rs b/meilisearch/src/routes/dump.rs
index 0fdeef5ed..c78dc4dad 100644
--- a/meilisearch/src/routes/dump.rs
+++ b/meilisearch/src/routes/dump.rs
@@ -26,7 +26,7 @@ pub async fn create_dump(
     opt: web::Data<Opt>,
     analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
-    analytics.publish(DumpAnalytics::default(), Some(&req));
+    analytics.publish(DumpAnalytics::default(), &req);
 
     let task = KindWithContent::DumpCreation {
         keys: auth_controller.list_keys()?,
diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs
index 24c89938d..4ee5b37b0 100644
--- a/meilisearch/src/routes/features.rs
+++ b/meilisearch/src/routes/features.rs
@@ -35,7 +35,7 @@ async fn get_features(
 ) -> HttpResponse {
     let features = index_scheduler.features();
 
-    analytics.publish(GetExperimentalFeatureAnalytics::default(), Some(&req));
+    analytics.publish(GetExperimentalFeatureAnalytics::default(), &req);
     let features = features.runtime_features();
     debug!(returns = ?features, "Get features");
     HttpResponse::Ok().json(features)
@@ -83,8 +83,8 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
         }
     }
 
-    fn into_event(self) -> serde_json::Value {
-        serde_json::to_value(self).unwrap()
+    fn into_event(self) -> impl Serialize {
+        self
     }
 }
 
@@ -131,7 +131,7 @@ async fn patch_features(
             edit_documents_by_function,
             contains_filter,
         },
-        Some(&req),
+        &req,
     );
     index_scheduler.put_runtime_features(new_features)?;
     debug!(returns = ?new_features, "Patch features");
diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs
index 8f4cd026d..6dece61e6 100644
--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@@ -194,7 +194,7 @@ pub async fn get_document(
             retrieve_vectors: param_retrieve_vectors.0,
             ..Default::default()
         },
-        Some(&req),
+        &req,
     );
 
     let index = index_scheduler.index(&index_uid)?;
@@ -253,7 +253,7 @@ pub async fn delete_document(
             per_document_id: true,
             ..Default::default()
         },
-        Some(&req),
+        &req,
     );
 
     let task = KindWithContent::DocumentDeletion {
@@ -319,7 +319,7 @@ pub async fn documents_by_query_post(
             max_offset: body.offset,
             ..Default::default()
         },
-        Some(&req),
+        &req,
     );
 
     documents_by_query(&index_scheduler, index_uid, body)
@@ -361,7 +361,7 @@ pub async fn get_documents(
             max_offset: query.offset,
             ..Default::default()
         },
-        Some(&req),
+        &req,
     );
 
     documents_by_query(&index_scheduler, index_uid, query)
@@ -486,7 +486,7 @@ pub async fn replace_documents(
             index_creation: index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
             method: PhantomData,
         },
-        Some(&req),
+        &req,
     );
 
     let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
@@ -543,7 +543,7 @@ pub async fn update_documents(
             index_creation: index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
             method: PhantomData,
         },
-        Some(&req),
+        &req,
     );
 
     let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
@@ -718,7 +718,7 @@ pub async fn delete_documents_batch(
 
     analytics.publish(
         DocumentsDeletionAggregator { total_received: 1, per_batch: true, ..Default::default() },
-        Some(&req),
+        &req,
     );
 
     let ids = body
@@ -761,7 +761,7 @@ pub async fn delete_documents_by_filter(
 
     analytics.publish(
         DocumentsDeletionAggregator { total_received: 1, per_filter: true, ..Default::default() },
-        Some(&req),
+        &req,
     );
 
     // we ensure the filter is well formed before enqueuing it
@@ -847,7 +847,7 @@ pub async fn edit_documents_by_function(
             with_context: params.context.is_some(),
             index_creation: index_scheduler.index(&index_uid).is_err(),
         },
-        Some(&req),
+        &req,
     );
 
     let DocumentEditionByFunction { filter, context, function } = params;
@@ -902,7 +902,7 @@ pub async fn clear_all_documents(
     let index_uid = IndexUid::try_from(index_uid.into_inner())?;
     analytics.publish(
         DocumentsDeletionAggregator { total_received: 1, clear_all: true, ..Default::default() },
-        Some(&req),
+        &req,
     );
 
     let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs
index 1e9d0e15e..f3c74a388 100644
--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@@ -200,7 +200,7 @@ pub async fn search(
     if let Ok(ref search_result) = search_result {
         aggregate.succeed(search_result);
     }
-    analytics.publish(aggregate, Some(&req));
+    analytics.publish(aggregate, &req);
 
     let search_result = search_result?;
 
diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs
index 483a48a16..f926f663c 100644
--- a/meilisearch/src/routes/indexes/mod.rs
+++ b/meilisearch/src/routes/indexes/mod.rs
@@ -160,7 +160,7 @@ pub async fn create_index(
     if allow_index_creation {
         analytics.publish(
             IndexCreatedAggregate { primary_key: primary_key.iter().cloned().collect() },
-            Some(&req),
+            &req,
         );
 
         let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
@@ -247,7 +247,7 @@ pub async fn update_index(
     let body = body.into_inner();
     analytics.publish(
         IndexUpdatedAggregate { primary_key: body.primary_key.iter().cloned().collect() },
-        Some(&req),
+        &req,
     );
 
     let task = KindWithContent::IndexUpdate {
diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs
index f833a57d2..538c46fd0 100644
--- a/meilisearch/src/routes/indexes/search.rs
+++ b/meilisearch/src/routes/indexes/search.rs
@@ -255,7 +255,7 @@ pub async fn search_with_url_query(
     if let Ok(ref search_result) = search_result {
         aggregate.succeed(search_result);
     }
-    analytics.publish(aggregate, Some(&req));
+    analytics.publish(aggregate, &req);
 
     let search_result = search_result?;
 
@@ -303,7 +303,7 @@ pub async fn search_with_post(
             MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
         }
     }
-    analytics.publish(aggregate, Some(&req));
+    analytics.publish(aggregate, &req);
 
     let search_result = search_result?;
 
diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs
index db83cb39b..bb2f6792d 100644
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -8,6 +8,7 @@ use meilisearch_types::deserr::DeserrJsonError;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::facet_values_sort::FacetValuesSort;
 use meilisearch_types::index_uid::IndexUid;
+use meilisearch_types::locales::Locale;
 use meilisearch_types::milli::update::Setting;
 use meilisearch_types::settings::{
     settings, ProximityPrecisionView, RankingRuleView, SecretPolicy, Settings, Unchecked,
@@ -94,7 +95,7 @@ macro_rules! make_setting_route {
                 #[allow(clippy::redundant_closure_call)]
                 analytics.publish(
                     $crate::routes::indexes::settings::$analytics::new(body.as_ref()).to_settings(),
-                    Some(&req),
+                    &req,
                 );
 
                 let new_settings = Settings {
@@ -491,11 +492,11 @@ impl Aggregate for SettingsAnalytics {
                 has_geo: self.filterable_attributes.has_geo.or(other.filterable_attributes.has_geo),
             },
             distinct_attribute: DistinctAttributeAnalytics {
-                set: self.distinct_attribute.set.or(other.distinct_attribute.set),
+                set: self.distinct_attribute.set | other.distinct_attribute.set,
             },
             proximity_precision: ProximityPrecisionAnalytics {
-                set: self.proximity_precision.set(other.proximity_precision.set),
-                value: self.proximity_precision.value(other.proximity_precision.value),
+                set: self.proximity_precision.set | other.proximity_precision.set,
+                value: self.proximity_precision.value.or(other.proximity_precision.value),
             },
             typo_tolerance: TypoToleranceAnalytics {
                 enabled: self.typo_tolerance.enabled.or(other.typo_tolerance.enabled),
@@ -542,7 +543,7 @@ impl Aggregate for SettingsAnalytics {
                 sources: match (self.embedders.sources, other.embedders.sources) {
                     (None, None) => None,
                     (Some(sources), None) | (None, Some(sources)) => Some(sources),
-                    (Some(this), Some(other)) => Some(this.union(&other).collect()),
+                    (Some(this), Some(other)) => Some(this.union(&other).cloned().collect()),
                 },
                 document_template_used: match (
                     self.embedders.document_template_used,
@@ -598,45 +599,70 @@ impl Aggregate for SettingsAnalytics {
 
 #[derive(Serialize, Default)]
 struct RankingRulesAnalytics {
-    words_position: Option<bool>,
-    typo_position: Option<bool>,
-    proximity_position: Option<bool>,
-    attribute_position: Option<bool>,
-    sort_position: Option<bool>,
-    exactness_position: Option<bool>,
-    values: Option<bool>,
+    words_position: Option<usize>,
+    typo_position: Option<usize>,
+    proximity_position: Option<usize>,
+    attribute_position: Option<usize>,
+    sort_position: Option<usize>,
+    exactness_position: Option<usize>,
+    values: Option<String>,
 }
 
 impl RankingRulesAnalytics {
     pub fn new(rr: Option<&Vec<RankingRuleView>>) -> Self {
         RankingRulesAnalytics {
-            words_position: rr.as_ref().map(|rr| {
-                rr.iter()
-                    .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words))
-            }),
-            typo_position: rr.as_ref().map(|rr| {
-                rr.iter()
-                    .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo))
-            }),
-            proximity_position: rr.as_ref().map(|rr| {
-                rr.iter().position(|s| {
-                    matches!(s, meilisearch_types::settings::RankingRuleView::Proximity)
+            words_position: rr
+                .as_ref()
+                .map(|rr| {
+                    rr.iter().position(|s| {
+                        matches!(s, meilisearch_types::settings::RankingRuleView::Words)
+                    })
                 })
-            }),
-            attribute_position: rr.as_ref().map(|rr| {
-                rr.iter().position(|s| {
-                    matches!(s, meilisearch_types::settings::RankingRuleView::Attribute)
+                .flatten(),
+
+            typo_position: rr
+                .as_ref()
+                .map(|rr| {
+                    rr.iter().position(|s| {
+                        matches!(s, meilisearch_types::settings::RankingRuleView::Typo)
+                    })
                 })
-            }),
-            sort_position: rr.as_ref().map(|rr| {
-                rr.iter()
-                    .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort))
-            }),
-            exactness_position: rr.as_ref().map(|rr| {
-                rr.iter().position(|s| {
-                    matches!(s, meilisearch_types::settings::RankingRuleView::Exactness)
+                .flatten(),
+
+            proximity_position: rr
+                .as_ref()
+                .map(|rr| {
+                    rr.iter().position(|s| {
+                        matches!(s, meilisearch_types::settings::RankingRuleView::Proximity)
+                    })
                 })
-            }),
+                .flatten(),
+
+            attribute_position: rr
+                .as_ref()
+                .map(|rr| {
+                    rr.iter().position(|s| {
+                        matches!(s, meilisearch_types::settings::RankingRuleView::Attribute)
+                    })
+                })
+                .flatten(),
+            sort_position: rr
+                .as_ref()
+                .map(|rr| {
+                    rr.iter().position(|s| {
+                        matches!(s, meilisearch_types::settings::RankingRuleView::Sort)
+                    })
+                })
+                .flatten(),
+            exactness_position: rr
+                .as_ref()
+                .map(|rr| {
+                    rr.iter().position(|s| {
+                        matches!(s, meilisearch_types::settings::RankingRuleView::Exactness)
+                    })
+                })
+                .flatten(),
+
             values: rr.as_ref().map(|rr| {
                 rr.iter()
                     .filter(|s| {
@@ -661,7 +687,7 @@ impl RankingRulesAnalytics {
 #[derive(Serialize, Default)]
 struct SearchableAttributesAnalytics {
     total: Option<usize>,
-    with_wildcard: bool,
+    with_wildcard: Option<bool>,
 }
 
 impl SearchableAttributesAnalytics {
@@ -681,8 +707,8 @@ impl SearchableAttributesAnalytics {
 
 #[derive(Serialize, Default)]
 struct DisplayedAttributesAnalytics {
-    total: usize,
-    with_wildcard: bool,
+    total: Option<usize>,
+    with_wildcard: Option<bool>,
 }
 
 impl DisplayedAttributesAnalytics {
@@ -702,8 +728,8 @@ impl DisplayedAttributesAnalytics {
 
 #[derive(Serialize, Default)]
 struct SortableAttributesAnalytics {
-    total: usize,
-    has_geo: bool,
+    total: Option<usize>,
+    has_geo: Option<bool>,
 }
 
 impl SortableAttributesAnalytics {
@@ -721,15 +747,15 @@ impl SortableAttributesAnalytics {
 
 #[derive(Serialize, Default)]
 struct FilterableAttributesAnalytics {
-    total: usize,
-    has_geo: bool,
+    total: Option<usize>,
+    has_geo: Option<bool>,
 }
 
 impl FilterableAttributesAnalytics {
     pub fn new(setting: Option<&std::collections::BTreeSet<String>>) -> Self {
         Self {
-            total: setting.as_ref().map(|filter| filter.len()).unwrap_or(0),
-            has_geo: setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false),
+            total: setting.as_ref().map(|filter| filter.len()),
+            has_geo: setting.as_ref().map(|filter| filter.contains("_geo")),
         }
     }
 
@@ -761,7 +787,7 @@ struct ProximityPrecisionAnalytics {
 
 impl ProximityPrecisionAnalytics {
     pub fn new(precision: Option<&meilisearch_types::settings::ProximityPrecisionView>) -> Self {
-        Self { set: precision.is_some(), value: precision.unwrap_or_default() }
+        Self { set: precision.is_some(), value: precision.cloned() }
     }
 
     pub fn to_settings(self) -> SettingsAnalytics {
@@ -774,8 +800,8 @@ struct TypoToleranceAnalytics {
     enabled: Option<bool>,
     disable_on_attributes: Option<bool>,
     disable_on_words: Option<bool>,
-    min_word_size_for_one_typo: Option<bool>,
-    min_word_size_for_two_typos: Option<bool>,
+    min_word_size_for_one_typo: Option<u8>,
+    min_word_size_for_two_typos: Option<u8>,
 }
 
 impl TypoToleranceAnalytics {
@@ -805,9 +831,9 @@ impl TypoToleranceAnalytics {
 
 #[derive(Serialize, Default)]
 struct FacetingAnalytics {
-    max_values_per_facet: Option<bool>,
+    max_values_per_facet: Option<usize>,
     sort_facet_values_by_star_count: Option<bool>,
-    sort_facet_values_by_total: Option<bool>,
+    sort_facet_values_by_total: Option<usize>,
 }
 
 impl FacetingAnalytics {
@@ -833,7 +859,7 @@ impl FacetingAnalytics {
 
 #[derive(Serialize, Default)]
 struct PaginationAnalytics {
-    max_total_hits: Option<bool>,
+    max_total_hits: Option<usize>,
 }
 
 impl PaginationAnalytics {
@@ -909,18 +935,18 @@ impl EmbeddersAnalytics {
             {
                 use meilisearch_types::milli::vector::settings::EmbedderSource;
                 match source {
-                    EmbedderSource::OpenAi => sources.insert("openAi"),
-                    EmbedderSource::HuggingFace => sources.insert("huggingFace"),
-                    EmbedderSource::UserProvided => sources.insert("userProvided"),
-                    EmbedderSource::Ollama => sources.insert("ollama"),
-                    EmbedderSource::Rest => sources.insert("rest"),
+                    EmbedderSource::OpenAi => sources.insert("openAi".to_string()),
+                    EmbedderSource::HuggingFace => sources.insert("huggingFace".to_string()),
+                    EmbedderSource::UserProvided => sources.insert("userProvided".to_string()),
+                    EmbedderSource::Ollama => sources.insert("ollama".to_string()),
+                    EmbedderSource::Rest => sources.insert("rest".to_string()),
                 };
             }
         };
 
         Self {
             total: setting.as_ref().map(|s| s.len()),
-            sources,
+            sources: Some(sources),
             document_template_used: setting.as_ref().map(|map| {
                 map.values()
                     .filter_map(|config| config.clone().set())
@@ -953,7 +979,7 @@ struct SearchCutoffMsAnalytics {
 
 impl SearchCutoffMsAnalytics {
     pub fn new(setting: Option<&u64>) -> Self {
-        Self { search_cutoff_ms: setting }
+        Self { search_cutoff_ms: setting.copied() }
     }
 
     pub fn to_settings(self) -> SettingsAnalytics {
@@ -964,7 +990,7 @@ impl SearchCutoffMsAnalytics {
 #[derive(Serialize, Default)]
 #[serde(transparent)]
 struct LocalesAnalytics {
-    locales: BTreeSet<String>,
+    locales: Option<BTreeSet<Locale>>,
 }
 
 impl LocalesAnalytics {
@@ -988,7 +1014,7 @@ impl LocalesAnalytics {
 
 #[derive(Serialize, Default)]
 struct DictionaryAnalytics {
-    total: usize,
+    total: Option<usize>,
 }
 
 impl DictionaryAnalytics {
@@ -1003,7 +1029,7 @@ impl DictionaryAnalytics {
 
 #[derive(Serialize, Default)]
 struct SeparatorTokensAnalytics {
-    total: usize,
+    total: Option<usize>,
 }
 
 impl SeparatorTokensAnalytics {
@@ -1018,7 +1044,7 @@ impl SeparatorTokensAnalytics {
 
 #[derive(Serialize, Default)]
 struct NonSeparatorTokensAnalytics {
-    total: usize,
+    total: Option<usize>,
 }
 
 impl NonSeparatorTokensAnalytics {
@@ -1088,7 +1114,7 @@ pub async fn update_all(
                 new_settings.non_separator_tokens.as_ref().set(),
             ),
         },
-        Some(&req),
+        &req,
     );
 
     let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
diff --git a/meilisearch/src/routes/indexes/similar.rs b/meilisearch/src/routes/indexes/similar.rs
index f94a02987..91c435254 100644
--- a/meilisearch/src/routes/indexes/similar.rs
+++ b/meilisearch/src/routes/indexes/similar.rs
@@ -13,6 +13,7 @@ use serde_json::Value;
 use tracing::debug;
 
 use super::ActionPolicy;
+use crate::analytics::segment_analytics::{SimilarGET, SimilarPOST};
 use crate::analytics::{Analytics, SimilarAggregator};
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
@@ -34,13 +35,13 @@ pub async fn similar_get(
     index_uid: web::Path<String>,
     params: AwebQueryParameter<SimilarQueryGet, DeserrQueryParamError>,
     req: HttpRequest,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     let index_uid = IndexUid::try_from(index_uid.into_inner())?;
 
     let query = params.0.try_into()?;
 
-    let mut aggregate = SimilarAggregator::from_query(&query, &req);
+    let mut aggregate = SimilarAggregator::<SimilarGET>::from_query(&query, &req);
 
     debug!(parameters = ?query, "Similar get");
 
@@ -49,7 +50,7 @@ pub async fn similar_get(
     if let Ok(similar) = &similar {
         aggregate.succeed(similar);
     }
-    analytics.get_similar(aggregate);
+    analytics.publish(aggregate, &req);
 
     let similar = similar?;
 
@@ -62,21 +63,21 @@ pub async fn similar_post(
     index_uid: web::Path<String>,
     params: AwebJson<SimilarQuery, DeserrJsonError>,
     req: HttpRequest,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     let index_uid = IndexUid::try_from(index_uid.into_inner())?;
 
     let query = params.into_inner();
     debug!(parameters = ?query, "Similar post");
 
-    let mut aggregate = SimilarAggregator::from_query(&query, &req);
+    let mut aggregate = SimilarAggregator::<SimilarPOST>::from_query(&query, &req);
 
     let similar = similar(index_scheduler, index_uid, query).await;
 
     if let Ok(similar) = &similar {
         aggregate.succeed(similar);
     }
-    analytics.post_similar(aggregate);
+    analytics.publish(aggregate, &req);
 
     let similar = similar?;
 
diff --git a/meilisearch/src/routes/multi_search.rs b/meilisearch/src/routes/multi_search.rs
index 5fcb868c6..994c256d2 100644
--- a/meilisearch/src/routes/multi_search.rs
+++ b/meilisearch/src/routes/multi_search.rs
@@ -35,7 +35,7 @@ pub async fn multi_search_with_post(
     search_queue: Data<SearchQueue>,
     params: AwebJson<FederatedSearch, DeserrJsonError>,
     req: HttpRequest,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     // Since we don't want to process half of the search requests and then get a permit refused
     // we're going to get one permit for the whole duration of the multi-search request.
@@ -87,7 +87,7 @@ pub async fn multi_search_with_post(
                 multi_aggregate.succeed();
             }
 
-            analytics.post_multi_search(multi_aggregate);
+            analytics.publish(multi_aggregate, &req);
             HttpResponse::Ok().json(search_result??)
         }
         None => {
@@ -149,7 +149,7 @@ pub async fn multi_search_with_post(
             if search_results.is_ok() {
                 multi_aggregate.succeed();
             }
-            analytics.post_multi_search(multi_aggregate);
+            analytics.publish(multi_aggregate, &req);
 
             let search_results = search_results.map_err(|(mut err, query_index)| {
                 // Add the query index that failed as context for the error message.
diff --git a/meilisearch/src/routes/snapshot.rs b/meilisearch/src/routes/snapshot.rs
index 84673729f..cacbc41af 100644
--- a/meilisearch/src/routes/snapshot.rs
+++ b/meilisearch/src/routes/snapshot.rs
@@ -3,7 +3,6 @@ use actix_web::{web, HttpRequest, HttpResponse};
 use index_scheduler::IndexScheduler;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::tasks::KindWithContent;
-use serde_json::json;
 use tracing::debug;
 
 use crate::analytics::Analytics;
@@ -17,13 +16,15 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
     cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot))));
 }
 
+crate::empty_analytics!(SnapshotAnalytics, "Snapshot Created");
+
 pub async fn create_snapshot(
     index_scheduler: GuardedData<ActionPolicy<{ actions::SNAPSHOTS_CREATE }>, Data<IndexScheduler>>,
     req: HttpRequest,
     opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
-    analytics.publish("Snapshot Created".to_string(), json!({}), Some(&req));
+    analytics.publish(SnapshotAnalytics::default(), &req);
 
     let task = KindWithContent::SnapshotCreation;
     let uid = get_task_id(&req, &opt)?;
diff --git a/meilisearch/src/routes/swap_indexes.rs b/meilisearch/src/routes/swap_indexes.rs
index 34e904230..42ebd7858 100644
--- a/meilisearch/src/routes/swap_indexes.rs
+++ b/meilisearch/src/routes/swap_indexes.rs
@@ -8,10 +8,11 @@ use meilisearch_types::error::deserr_codes::InvalidSwapIndexes;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::tasks::{IndexSwap, KindWithContent};
+use serde::Serialize;
 use serde_json::json;
 
 use super::{get_task_id, is_dry_run, SummarizedTaskView};
-use crate::analytics::Analytics;
+use crate::analytics::{Aggregate, Analytics};
 use crate::error::MeilisearchHttpError;
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::{AuthenticationError, GuardedData};
@@ -29,21 +30,34 @@ pub struct SwapIndexesPayload {
     indexes: Vec<IndexUid>,
 }
 
+#[derive(Serialize)]
+struct IndexSwappedAnalytics {
+    swap_operation_number: usize,
+}
+
+impl Aggregate for IndexSwappedAnalytics {
+    fn event_name(&self) -> &'static str {
+        "Indexes Swapped"
+    }
+
+    fn aggregate(self, other: Self) -> Self {
+        Self { swap_operation_number: self.swap_operation_number.max(other.swap_operation_number) }
+    }
+
+    fn into_event(self) -> impl Serialize {
+        self
+    }
+}
+
 pub async fn swap_indexes(
     index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_SWAP }>, Data<IndexScheduler>>,
     params: AwebJson<Vec<SwapIndexesPayload>, DeserrJsonError>,
     req: HttpRequest,
     opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     let params = params.into_inner();
-    analytics.publish(
-        "Indexes Swapped".to_string(),
-        json!({
-            "swap_operation_number": params.len(), // Return the max ever encountered
-        }),
-        Some(&req),
-    );
+    analytics.publish(IndexSwappedAnalytics { swap_operation_number: params.len() }, &req);
     let filters = index_scheduler.filters();
 
     let mut swaps = vec![];
diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs
index 3dc6520af..162d19ca1 100644
--- a/meilisearch/src/routes/tasks.rs
+++ b/meilisearch/src/routes/tasks.rs
@@ -12,18 +12,17 @@ use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
 use meilisearch_types::task_view::TaskView;
 use meilisearch_types::tasks::{Kind, KindWithContent, Status};
 use serde::Serialize;
-use serde_json::json;
 use time::format_description::well_known::Rfc3339;
 use time::macros::format_description;
 use time::{Date, Duration, OffsetDateTime, Time};
 use tokio::task;
 
 use super::{get_task_id, is_dry_run, SummarizedTaskView};
-use crate::analytics::Analytics;
+use crate::analytics::{Aggregate, AggregateMethod, Analytics};
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
-use crate::Opt;
+use crate::{aggregate_methods, Opt};
 
 const DEFAULT_LIMIT: u32 = 20;
 
@@ -158,12 +157,69 @@ impl TaskDeletionOrCancelationQuery {
     }
 }
 
+aggregate_methods!(
+    CancelTasks => "Tasks Canceled",
+    DeleteTasks => "Tasks Deleted",
+);
+
+#[derive(Serialize)]
+struct TaskFilterAnalytics<Method: AggregateMethod> {
+    filtered_by_uid: bool,
+    filtered_by_index_uid: bool,
+    filtered_by_type: bool,
+    filtered_by_status: bool,
+    filtered_by_canceled_by: bool,
+    filtered_by_before_enqueued_at: bool,
+    filtered_by_after_enqueued_at: bool,
+    filtered_by_before_started_at: bool,
+    filtered_by_after_started_at: bool,
+    filtered_by_before_finished_at: bool,
+    filtered_by_after_finished_at: bool,
+
+    #[serde(skip)]
+    marker: std::marker::PhantomData<Method>,
+}
+
+impl<Method: AggregateMethod> Aggregate for TaskFilterAnalytics<Method> {
+    fn event_name(&self) -> &'static str {
+        Method::event_name()
+    }
+
+    fn aggregate(self, other: Self) -> Self {
+        Self {
+            filtered_by_uid: self.filtered_by_uid | other.filtered_by_uid,
+            filtered_by_index_uid: self.filtered_by_index_uid | other.filtered_by_index_uid,
+            filtered_by_type: self.filtered_by_type | other.filtered_by_type,
+            filtered_by_status: self.filtered_by_status | other.filtered_by_status,
+            filtered_by_canceled_by: self.filtered_by_canceled_by | other.filtered_by_canceled_by,
+            filtered_by_before_enqueued_at: self.filtered_by_before_enqueued_at
+                | other.filtered_by_before_enqueued_at,
+            filtered_by_after_enqueued_at: self.filtered_by_after_enqueued_at
+                | other.filtered_by_after_enqueued_at,
+            filtered_by_before_started_at: self.filtered_by_before_started_at
+                | other.filtered_by_before_started_at,
+            filtered_by_after_started_at: self.filtered_by_after_started_at
+                | other.filtered_by_after_started_at,
+            filtered_by_before_finished_at: self.filtered_by_before_finished_at
+                | other.filtered_by_before_finished_at,
+            filtered_by_after_finished_at: self.filtered_by_after_finished_at
+                | other.filtered_by_after_finished_at,
+
+            marker: std::marker::PhantomData,
+        }
+    }
+
+    fn into_event(self) -> impl Serialize {
+        self
+    }
+}
+
 async fn cancel_tasks(
     index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_CANCEL }>, Data<IndexScheduler>>,
     params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
     req: HttpRequest,
     opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     let params = params.into_inner();
 
@@ -172,21 +228,22 @@ async fn cancel_tasks(
     }
 
     analytics.publish(
-        "Tasks Canceled".to_string(),
-        json!({
-            "filtered_by_uid": params.uids.is_some(),
-            "filtered_by_index_uid": params.index_uids.is_some(),
-            "filtered_by_type": params.types.is_some(),
-            "filtered_by_status": params.statuses.is_some(),
-            "filtered_by_canceled_by": params.canceled_by.is_some(),
-            "filtered_by_before_enqueued_at": params.before_enqueued_at.is_some(),
-            "filtered_by_after_enqueued_at": params.after_enqueued_at.is_some(),
-            "filtered_by_before_started_at": params.before_started_at.is_some(),
-            "filtered_by_after_started_at": params.after_started_at.is_some(),
-            "filtered_by_before_finished_at": params.before_finished_at.is_some(),
-            "filtered_by_after_finished_at": params.after_finished_at.is_some(),
-        }),
-        Some(&req),
+        TaskFilterAnalytics::<CancelTasks> {
+            filtered_by_uid: params.uids.is_some(),
+            filtered_by_index_uid: params.index_uids.is_some(),
+            filtered_by_type: params.types.is_some(),
+            filtered_by_status: params.statuses.is_some(),
+            filtered_by_canceled_by: params.canceled_by.is_some(),
+            filtered_by_before_enqueued_at: params.before_enqueued_at.is_some(),
+            filtered_by_after_enqueued_at: params.after_enqueued_at.is_some(),
+            filtered_by_before_started_at: params.before_started_at.is_some(),
+            filtered_by_after_started_at: params.after_started_at.is_some(),
+            filtered_by_before_finished_at: params.before_finished_at.is_some(),
+            filtered_by_after_finished_at: params.after_finished_at.is_some(),
+
+            marker: std::marker::PhantomData,
+        },
+        &req,
     );
 
     let query = params.into_query();
@@ -214,7 +271,7 @@ async fn delete_tasks(
     params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
     req: HttpRequest,
     opt: web::Data<Opt>,
-    analytics: web::Data<dyn Analytics>,
+    analytics: web::Data<Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
     let params = params.into_inner();
 
@@ -223,22 +280,24 @@ async fn delete_tasks(
     }
 
     analytics.publish(
-        "Tasks Deleted".to_string(),
-        json!({
-            "filtered_by_uid": params.uids.is_some(),
-            "filtered_by_index_uid": params.index_uids.is_some(),
-            "filtered_by_type": params.types.is_some(),
-            "filtered_by_status": params.statuses.is_some(),
-            "filtered_by_canceled_by": params.canceled_by.is_some(),
-            "filtered_by_before_enqueued_at": params.before_enqueued_at.is_some(),
-            "filtered_by_after_enqueued_at": params.after_enqueued_at.is_some(),
-            "filtered_by_before_started_at": params.before_started_at.is_some(),
-            "filtered_by_after_started_at": params.after_started_at.is_some(),
-            "filtered_by_before_finished_at": params.before_finished_at.is_some(),
-            "filtered_by_after_finished_at": params.after_finished_at.is_some(),
-        }),
-        Some(&req),
+        TaskFilterAnalytics::<DeleteTasks> {
+            filtered_by_uid: params.uids.is_some(),
+            filtered_by_index_uid: params.index_uids.is_some(),
+            filtered_by_type: params.types.is_some(),
+            filtered_by_status: params.statuses.is_some(),
+            filtered_by_canceled_by: params.canceled_by.is_some(),
+            filtered_by_before_enqueued_at: params.before_enqueued_at.is_some(),
+            filtered_by_after_enqueued_at: params.after_enqueued_at.is_some(),
+            filtered_by_before_started_at: params.before_started_at.is_some(),
+            filtered_by_after_started_at: params.after_started_at.is_some(),
+            filtered_by_before_finished_at: params.before_finished_at.is_some(),
+            filtered_by_after_finished_at: params.after_finished_at.is_some(),
+
+            marker: std::marker::PhantomData,
+        },
+        &req,
     );
+
     let query = params.into_query();
 
     let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes(

From ea6883189ef73429b748473d436b71ea4a7a5a52 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 16 Oct 2024 21:17:06 +0200
Subject: [PATCH 54/92] finish the analytics in all the routes

---
 meilisearch/src/analytics/mod.rs              |  33 ++--
 .../src/analytics/segment_analytics.rs        | 153 +++---------------
 meilisearch/src/routes/features.rs            |   1 -
 meilisearch/src/routes/indexes/documents.rs   |  58 +++----
 .../src/routes/indexes/facet_search.rs        |  24 +--
 meilisearch/src/routes/indexes/mod.rs         |   5 +-
 meilisearch/src/routes/indexes/similar.rs     |   4 +-
 meilisearch/src/routes/multi_search.rs        |   2 +-
 meilisearch/src/routes/swap_indexes.rs        |   1 -
 meilisearch/src/routes/tasks.rs               |   2 +-
 10 files changed, 84 insertions(+), 199 deletions(-)

diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs
index a0ca47d8f..ab6fd9993 100644
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@@ -1,7 +1,5 @@
 pub mod segment_analytics;
 
-use std::any::TypeId;
-use std::collections::HashMap;
 use std::fs;
 use std::path::{Path, PathBuf};
 use std::str::FromStr;
@@ -10,7 +8,6 @@ use actix_web::HttpRequest;
 use meilisearch_types::InstanceUid;
 use once_cell::sync::Lazy;
 use platform_dirs::AppDirs;
-use segment::message::User;
 use serde::Serialize;
 
 // if the feature analytics is enabled we use the real analytics
@@ -83,7 +80,7 @@ pub enum DocumentFetchKind {
     Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
 }
 
-pub trait Aggregate {
+pub trait Aggregate: 'static {
     fn event_name(&self) -> &'static str;
 
     fn aggregate(self, other: Self) -> Self
@@ -97,7 +94,7 @@ pub trait Aggregate {
 
 /// Helper trait to define multiple aggregate with the same content but a different name.
 /// Commonly used when you must aggregate a search with POST or with GET for example.
-pub trait AggregateMethod {
+pub trait AggregateMethod: 'static + Default {
     fn event_name() -> &'static str;
 }
 
@@ -105,7 +102,8 @@ pub trait AggregateMethod {
 #[macro_export]
 macro_rules! aggregate_methods {
     ($method:ident => $event_name:literal) => {
-        pub enum $method {}
+        #[derive(Default)]
+        pub struct $method {}
 
         impl $crate::analytics::AggregateMethod for $method {
             fn event_name() -> &'static str {
@@ -122,35 +120,26 @@ macro_rules! aggregate_methods {
 }
 
 pub struct Analytics {
-    // TODO: TAMO: remove
-    inner: Option<SegmentAnalytics>,
-
-    instance_uid: Option<InstanceUid>,
-    user: Option<User>,
-    events: HashMap<TypeId, Box<dyn Aggregate>>,
+    segment: Option<SegmentAnalytics>,
 }
 
 impl Analytics {
     fn no_analytics() -> Self {
-        Self { inner: None, events: HashMap::new(), instance_uid: None, user: None }
+        Self { segment: None }
     }
 
     fn segment_analytics(segment: SegmentAnalytics) -> Self {
-        Self {
-            instance_uid: Some(segment.instance_uid),
-            user: Some(segment.user),
-            inner: Some(segment),
-            events: HashMap::new(),
-        }
+        Self { segment: Some(segment) }
     }
 
     pub fn instance_uid(&self) -> Option<&InstanceUid> {
-        self.instance_uid
+        self.segment.as_ref().map(|segment| segment.instance_uid.as_ref())
     }
 
     /// The method used to publish most analytics that do not need to be batched every hours
-    pub fn publish(&self, send: impl Aggregate, request: &HttpRequest) {
-        let Some(segment) = self.inner else { return };
+    pub fn publish(&self, event: impl Aggregate, request: &HttpRequest) {
+        let Some(ref segment) = self.segment else { return };
         let user_agents = extract_user_agents(request);
+        let _ = segment.sender.try_send(Box::new(event));
     }
 }
diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 0572267e1..601fefa1e 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -1,3 +1,4 @@
+use std::any::{Any, TypeId};
 use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet};
 use std::fs;
 use std::mem::take;
@@ -74,6 +75,7 @@ pub fn extract_user_agents(request: &HttpRequest) -> Vec<String> {
 pub struct SegmentAnalytics {
     pub instance_uid: InstanceUid,
     pub user: User,
+    pub sender: Sender<Box<dyn Aggregate>>,
 }
 
 impl SegmentAnalytics {
@@ -128,18 +130,7 @@ impl SegmentAnalytics {
             user: user.clone(),
             opt: opt.clone(),
             batcher,
-            post_search_aggregator: SearchAggregator::default(),
-            post_multi_search_aggregator: MultiSearchAggregator::default(),
-            post_facet_search_aggregator: FacetSearchAggregator::default(),
-            get_search_aggregator: SearchAggregator::default(),
-            add_documents_aggregator: DocumentsAggregator::default(),
-            delete_documents_aggregator: DocumentsDeletionAggregator::default(),
-            update_documents_aggregator: DocumentsAggregator::default(),
-            edit_documents_by_function_aggregator: EditDocumentsByFunctionAggregator::default(),
-            get_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
-            post_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
-            get_similar_aggregator: SimilarAggregator::default(),
-            post_similar_aggregator: SimilarAggregator::default(),
+            events: todo!(),
         });
         tokio::spawn(segment.run(index_scheduler.clone(), auth_controller.clone()));
 
@@ -387,22 +378,11 @@ impl From<Opt> for Infos {
 }
 
 pub struct Segment {
-    inbox: Receiver<AnalyticsMsg>,
+    inbox: Receiver<Box<dyn Aggregate>>,
     user: User,
     opt: Opt,
     batcher: AutoBatcher,
-    get_search_aggregator: SearchAggregator,
-    post_search_aggregator: SearchAggregator,
-    post_multi_search_aggregator: MultiSearchAggregator,
-    post_facet_search_aggregator: FacetSearchAggregator,
-    add_documents_aggregator: DocumentsAggregator,
-    delete_documents_aggregator: DocumentsDeletionAggregator,
-    update_documents_aggregator: DocumentsAggregator,
-    edit_documents_by_function_aggregator: EditDocumentsByFunctionAggregator,
-    get_fetch_documents_aggregator: DocumentsFetchAggregator,
-    post_fetch_documents_aggregator: DocumentsFetchAggregator,
-    get_similar_aggregator: SimilarAggregator,
-    post_similar_aggregator: SimilarAggregator,
+    events: HashMap<TypeId, Box<dyn Aggregate>>,
 }
 
 impl Segment {
@@ -455,19 +435,8 @@ impl Segment {
                 },
                 msg = self.inbox.recv() => {
                     match msg {
-                        Some(AnalyticsMsg::BatchMessage(msg)) => drop(self.batcher.push(msg).await),
-                        Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg),
-                        Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg),
-                        Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg),
-                        Some(AnalyticsMsg::AggregatePostFacetSearch(agreg)) => self.post_facet_search_aggregator.aggregate(agreg),
-                        Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
-                        Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
-                        Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
-                        Some(AnalyticsMsg::AggregateEditDocumentsByFunction(agreg)) => self.edit_documents_by_function_aggregator.aggregate(agreg),
-                        Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg),
-                        Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg),
-                        Some(AnalyticsMsg::AggregateGetSimilar(agreg)) => self.get_similar_aggregator.aggregate(agreg),
-                        Some(AnalyticsMsg::AggregatePostSimilar(agreg)) => self.post_similar_aggregator.aggregate(agreg),
+                        // Some(AnalyticsMsg::BatchMessage(msg)) => drop(self.batcher.push(msg).await),
+                        Some(_) => todo!(),
                         None => (),
                     }
                 }
@@ -507,87 +476,19 @@ impl Segment {
                 .await;
         }
 
-        let Segment {
-            inbox: _,
-            opt: _,
-            batcher: _,
-            user,
-            get_search_aggregator,
-            post_search_aggregator,
-            post_multi_search_aggregator,
-            post_facet_search_aggregator,
-            add_documents_aggregator,
-            delete_documents_aggregator,
-            update_documents_aggregator,
-            edit_documents_by_function_aggregator,
-            get_fetch_documents_aggregator,
-            post_fetch_documents_aggregator,
-            get_similar_aggregator,
-            post_similar_aggregator,
-        } = self;
+        // We empty the list of events
+        let events = std::mem::take(&mut self.events);
 
-        if let Some(get_search) =
-            take(get_search_aggregator).into_event(user, "Documents Searched GET")
-        {
-            let _ = self.batcher.push(get_search).await;
-        }
-        if let Some(post_search) =
-            take(post_search_aggregator).into_event(user, "Documents Searched POST")
-        {
-            let _ = self.batcher.push(post_search).await;
-        }
-        if let Some(post_multi_search) = take(post_multi_search_aggregator)
-            .into_event(user, "Documents Searched by Multi-Search POST")
-        {
-            let _ = self.batcher.push(post_multi_search).await;
-        }
-        if let Some(post_facet_search) =
-            take(post_facet_search_aggregator).into_event(user, "Facet Searched POST")
-        {
-            let _ = self.batcher.push(post_facet_search).await;
-        }
-        if let Some(add_documents) =
-            take(add_documents_aggregator).into_event(user, "Documents Added")
-        {
-            let _ = self.batcher.push(add_documents).await;
-        }
-        if let Some(delete_documents) =
-            take(delete_documents_aggregator).into_event(user, "Documents Deleted")
-        {
-            let _ = self.batcher.push(delete_documents).await;
-        }
-        if let Some(update_documents) =
-            take(update_documents_aggregator).into_event(user, "Documents Updated")
-        {
-            let _ = self.batcher.push(update_documents).await;
-        }
-        if let Some(edit_documents_by_function) = take(edit_documents_by_function_aggregator)
-            .into_event(user, "Documents Edited By Function")
-        {
-            let _ = self.batcher.push(edit_documents_by_function).await;
-        }
-        if let Some(get_fetch_documents) =
-            take(get_fetch_documents_aggregator).into_event(user, "Documents Fetched GET")
-        {
-            let _ = self.batcher.push(get_fetch_documents).await;
-        }
-        if let Some(post_fetch_documents) =
-            take(post_fetch_documents_aggregator).into_event(user, "Documents Fetched POST")
-        {
-            let _ = self.batcher.push(post_fetch_documents).await;
+        for (_, mut event) in events {
+            self.batcher.push(Track {
+                user: self.user,
+                event: event.event_name().to_string(),
+                properties: event.into_event(),
+                timestamp: todo!(),
+                ..Default::default()
+            });
         }
 
-        if let Some(get_similar_documents) =
-            take(get_similar_aggregator).into_event(user, "Similar GET")
-        {
-            let _ = self.batcher.push(get_similar_documents).await;
-        }
-
-        if let Some(post_similar_documents) =
-            take(post_similar_aggregator).into_event(user, "Similar POST")
-        {
-            let _ = self.batcher.push(post_similar_documents).await;
-        }
         let _ = self.batcher.flush().await;
     }
 }
@@ -702,10 +603,8 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
         } = query;
 
         let mut ret = Self::default();
-        ret.timestamp = Some(OffsetDateTime::now_utc());
 
         ret.total_received = 1;
-        ret.user_agents = extract_user_agents(request).into_iter().collect();
 
         if let Some(ref sort) = sort {
             ret.sort_total_number_of_criteria = 1;
@@ -949,7 +848,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
         self
     }
 
-    fn into_event(self) -> Option<Track> {
+    fn into_event(self) -> impl Serialize {
         let Self {
             total_received,
             total_succeeded,
@@ -1087,10 +986,7 @@ pub struct MultiSearchAggregator {
 }
 
 impl MultiSearchAggregator {
-    pub fn from_federated_search(
-        federated_search: &FederatedSearch,
-        request: &HttpRequest,
-    ) -> Self {
+    pub fn from_federated_search(federated_search: &FederatedSearch) -> Self {
         let use_federation = federated_search.federation.is_some();
 
         let distinct_indexes: HashSet<_> = federated_search
@@ -1162,7 +1058,7 @@ impl Aggregate for MultiSearchAggregator {
     }
 
     /// Aggregate one [MultiSearchAggregator] into another.
-    fn aggregate(mut self, other: Self) -> Self {
+    fn aggregate(self, other: Self) -> Self {
         // write the aggregate in a way that will cause a compilation error if a field is added.
 
         // get ownership of self, replacing it by a default value.
@@ -1177,13 +1073,8 @@ impl Aggregate for MultiSearchAggregator {
         let show_ranking_score = this.show_ranking_score || other.show_ranking_score;
         let show_ranking_score_details =
             this.show_ranking_score_details || other.show_ranking_score_details;
-        let mut user_agents = this.user_agents;
         let use_federation = this.use_federation || other.use_federation;
 
-        for user_agent in other.user_agents.into_iter() {
-            user_agents.insert(user_agent);
-        }
-
         Self {
             total_received,
             total_succeeded,
@@ -1748,7 +1639,7 @@ pub struct SimilarAggregator<Method: AggregateMethod> {
 
 impl<Method: AggregateMethod> SimilarAggregator<Method> {
     #[allow(clippy::field_reassign_with_default)]
-    pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self {
+    pub fn from_query(query: &SimilarQuery) -> Self {
         let SimilarQuery {
             id: _,
             embedder: _,
@@ -1763,10 +1654,8 @@ impl<Method: AggregateMethod> SimilarAggregator<Method> {
         } = query;
 
         let mut ret = Self::default();
-        ret.timestamp = Some(OffsetDateTime::now_utc());
 
         ret.total_received = 1;
-        ret.user_agents = extract_user_agents(request).into_iter().collect();
 
         if let Some(ref filter) = filter {
             static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs
index 4ee5b37b0..0b43c3f13 100644
--- a/meilisearch/src/routes/features.rs
+++ b/meilisearch/src/routes/features.rs
@@ -7,7 +7,6 @@ use meilisearch_types::deserr::DeserrJsonError;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::keys::actions;
 use serde::Serialize;
-use serde_json::json;
 use tracing::debug;
 
 use crate::analytics::{Aggregate, Analytics};
diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs
index 6dece61e6..1573b768b 100644
--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@@ -32,7 +32,7 @@ use tokio::fs::File;
 use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};
 use tracing::debug;
 
-use crate::analytics::{Aggregate, AggregateMethod, Analytics, DocumentDeletionKind};
+use crate::analytics::{Aggregate, AggregateMethod, Analytics};
 use crate::error::MeilisearchHttpError;
 use crate::error::PayloadError::ReceivePayload;
 use crate::extractors::authentication::policies::*;
@@ -102,8 +102,13 @@ pub struct GetDocument {
     retrieve_vectors: Param<bool>,
 }
 
+aggregate_methods!(
+    DocumentsGET => "Documents Fetched GET",
+    DocumentsPOST => "Documents Fetched POST",
+);
+
 #[derive(Default, Serialize)]
-pub struct DocumentsFetchAggregator {
+pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
     #[serde(rename = "requests.total_received")]
     total_received: usize,
 
@@ -120,6 +125,8 @@ pub struct DocumentsFetchAggregator {
     max_limit: usize,
     #[serde(rename = "pagination.max_offset")]
     max_offset: usize,
+
+    marker: std::marker::PhantomData<Method>,
 }
 
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
@@ -128,7 +135,7 @@ pub enum DocumentFetchKind {
     Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
 }
 
-impl DocumentsFetchAggregator {
+impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
     pub fn from_query(query: &DocumentFetchKind) -> Self {
         let (limit, offset, retrieve_vectors) = match query {
             DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors),
@@ -136,6 +143,7 @@ impl DocumentsFetchAggregator {
                 (*limit, *offset, *retrieve_vectors)
             }
         };
+
         Self {
             total_received: 1,
             per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
@@ -143,20 +151,18 @@ impl DocumentsFetchAggregator {
             max_limit: limit,
             max_offset: offset,
             retrieve_vectors,
+
+            marker: PhantomData,
         }
     }
 }
 
-impl Aggregate for DocumentsFetchAggregator {
-    // TODO: TAMO: Should we do the same event for the GET requests
+impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
     fn event_name(&self) -> &'static str {
-        "Documents Fetched POST"
+        Method::event_name()
     }
 
-    fn aggregate(self, other: Self) -> Self
-    where
-        Self: Sized,
-    {
+    fn aggregate(self, other: Self) -> Self {
         Self {
             total_received: self.total_received.saturating_add(other.total_received),
             per_document_id: self.per_document_id | other.per_document_id,
@@ -164,11 +170,12 @@ impl Aggregate for DocumentsFetchAggregator {
             retrieve_vectors: self.retrieve_vectors | other.retrieve_vectors,
             max_limit: self.max_limit.max(other.max_limit),
             max_offset: self.max_offset.max(other.max_offset),
+            marker: PhantomData,
         }
     }
 
-    fn into_event(self) -> Value {
-        serde_json::to_value(self).unwrap()
+    fn into_event(self) -> impl Serialize {
+        self
     }
 }
 
@@ -190,7 +197,7 @@ pub async fn get_document(
     let retrieve_vectors = RetrieveVectors::new(param_retrieve_vectors.0, features)?;
 
     analytics.publish(
-        DocumentsFetchAggregator {
+        DocumentsFetchAggregator::<DocumentsGET> {
             retrieve_vectors: param_retrieve_vectors.0,
             ..Default::default()
         },
@@ -232,8 +239,8 @@ impl Aggregate for DocumentsDeletionAggregator {
         }
     }
 
-    fn into_event(self) -> Value {
-        serde_json::to_value(self).unwrap()
+    fn into_event(self) -> impl Serialize {
+        self
     }
 }
 
@@ -311,7 +318,7 @@ pub async fn documents_by_query_post(
     debug!(parameters = ?body, "Get documents POST");
 
     analytics.publish(
-        DocumentsFetchAggregator {
+        DocumentsFetchAggregator::<DocumentsPOST> {
             total_received: 1,
             per_filter: body.filter.is_some(),
             retrieve_vectors: body.retrieve_vectors,
@@ -353,7 +360,7 @@ pub async fn get_documents(
     };
 
     analytics.publish(
-        DocumentsFetchAggregator {
+        DocumentsFetchAggregator::<DocumentsGET> {
             total_received: 1,
             per_filter: query.filter.is_some(),
             retrieve_vectors: query.retrieve_vectors,
@@ -436,20 +443,17 @@ impl<Method: AggregateMethod> Aggregate for DocumentsAggregator<Method> {
         Method::event_name()
     }
 
-    fn aggregate(mut self, other: Self) -> Self
-    where
-        Self: Sized,
-    {
+    fn aggregate(self, other: Self) -> Self {
         Self {
-            payload_types: self.payload_types.union(&other.payload_types).collect(),
-            primary_key: self.primary_key.union(&other.primary_key).collect(),
+            payload_types: self.payload_types.union(&other.payload_types).cloned().collect(),
+            primary_key: self.primary_key.union(&other.primary_key).cloned().collect(),
             index_creation: self.index_creation | other.index_creation,
             method: PhantomData,
         }
     }
 
-    fn into_event(self) -> Value {
-        serde_json::to_value(self).unwrap()
+    fn into_event(self) -> impl Serialize {
+        self
     }
 }
 
@@ -818,8 +822,8 @@ impl Aggregate for EditDocumentsByFunctionAggregator {
         }
     }
 
-    fn into_event(self) -> Value {
-        serde_json::to_value(self).unwrap()
+    fn into_event(self) -> impl Serialize {
+        self
     }
 }
 
diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs
index f3c74a388..08618970d 100644
--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@@ -9,6 +9,7 @@ use meilisearch_types::error::deserr_codes::*;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::locales::Locale;
+use serde::Serialize;
 use serde_json::Value;
 use tracing::debug;
 
@@ -72,7 +73,7 @@ pub struct FacetSearchAggregator {
 
 impl FacetSearchAggregator {
     #[allow(clippy::field_reassign_with_default)]
-    pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self {
+    pub fn from_query(query: &FacetSearchQuery) -> Self {
         let FacetSearchQuery {
             facet_query: _,
             facet_name,
@@ -113,23 +114,22 @@ impl Aggregate for FacetSearchAggregator {
         "Facet Searched POST"
     }
 
-    fn aggregate(mut self, other: Self) -> Self
-    where
-        Self: Sized,
-    {
-        self.time_spent.insert(other.time_spent);
+    fn aggregate(mut self, other: Self) -> Self {
+        for time in other.time_spent {
+            self.time_spent.push(time);
+        }
 
         Self {
             total_received: self.total_received.saturating_add(other.total_received),
             total_succeeded: self.total_succeeded.saturating_add(other.total_succeeded),
             time_spent: self.time_spent,
-            facet_names: self.facet_names.union(&other.facet_names).collect(),
+            facet_names: self.facet_names.union(&other.facet_names).cloned().collect(),
             additional_search_parameters_provided: self.additional_search_parameters_provided
                 | other.additional_search_parameters_provided,
         }
     }
 
-    fn into_event(self) -> Value {
+    fn into_event(self) -> impl Serialize {
         let Self {
             total_received,
             total_succeeded,
@@ -137,6 +137,12 @@ impl Aggregate for FacetSearchAggregator {
             facet_names,
             additional_search_parameters_provided,
         } = self;
+        // the index of the 99th percentage of value
+        let percentile_99th = 0.99 * (total_succeeded as f64 - 1.) + 1.;
+        // we get all the values in a sorted manner
+        let time_spent = time_spent.into_sorted_vec();
+        // We are only interested by the slowest value of the 99th fastest results
+        let time_spent = time_spent.get(percentile_99th as usize);
 
         serde_json::json!({
             "requests": {
@@ -166,7 +172,7 @@ pub async fn search(
     let query = params.into_inner();
     debug!(parameters = ?query, "Facet search");
 
-    let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
+    let mut aggregate = FacetSearchAggregator::from_query(&query);
 
     let facet_query = query.facet_query.clone();
     let facet_name = query.facet_name.clone();
diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs
index f926f663c..3c41f36fe 100644
--- a/meilisearch/src/routes/indexes/mod.rs
+++ b/meilisearch/src/routes/indexes/mod.rs
@@ -14,7 +14,6 @@ use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::milli::{self, FieldDistribution, Index};
 use meilisearch_types::tasks::KindWithContent;
 use serde::Serialize;
-use serde_json::json;
 use time::OffsetDateTime;
 use tracing::debug;
 
@@ -138,7 +137,7 @@ impl Aggregate for IndexCreatedAggregate {
     where
         Self: Sized,
     {
-        Self { primary_key: self.primary_key.union(&other.primary_key).collect() }
+        Self { primary_key: self.primary_key.union(&other.primary_key).cloned().collect() }
     }
 
     fn into_event(self) -> impl Serialize {
@@ -227,7 +226,7 @@ impl Aggregate for IndexUpdatedAggregate {
     }
 
     fn aggregate(self, other: Self) -> Self {
-        Self { primary_key: self.primary_key.union(&other.primary_key).collect() }
+        Self { primary_key: self.primary_key.union(&other.primary_key).cloned().collect() }
     }
 
     fn into_event(self) -> impl Serialize {
diff --git a/meilisearch/src/routes/indexes/similar.rs b/meilisearch/src/routes/indexes/similar.rs
index 91c435254..33df6bdad 100644
--- a/meilisearch/src/routes/indexes/similar.rs
+++ b/meilisearch/src/routes/indexes/similar.rs
@@ -41,7 +41,7 @@ pub async fn similar_get(
 
     let query = params.0.try_into()?;
 
-    let mut aggregate = SimilarAggregator::<SimilarGET>::from_query(&query, &req);
+    let mut aggregate = SimilarAggregator::<SimilarGET>::from_query(&query);
 
     debug!(parameters = ?query, "Similar get");
 
@@ -70,7 +70,7 @@ pub async fn similar_post(
     let query = params.into_inner();
     debug!(parameters = ?query, "Similar post");
 
-    let mut aggregate = SimilarAggregator::<SimilarPOST>::from_query(&query, &req);
+    let mut aggregate = SimilarAggregator::<SimilarPOST>::from_query(&query);
 
     let similar = similar(index_scheduler, index_uid, query).await;
 
diff --git a/meilisearch/src/routes/multi_search.rs b/meilisearch/src/routes/multi_search.rs
index 994c256d2..13a39cb44 100644
--- a/meilisearch/src/routes/multi_search.rs
+++ b/meilisearch/src/routes/multi_search.rs
@@ -43,7 +43,7 @@ pub async fn multi_search_with_post(
 
     let federated_search = params.into_inner();
 
-    let mut multi_aggregate = MultiSearchAggregator::from_federated_search(&federated_search, &req);
+    let mut multi_aggregate = MultiSearchAggregator::from_federated_search(&federated_search);
 
     let FederatedSearch { mut queries, federation } = federated_search;
 
diff --git a/meilisearch/src/routes/swap_indexes.rs b/meilisearch/src/routes/swap_indexes.rs
index 42ebd7858..abdffbb73 100644
--- a/meilisearch/src/routes/swap_indexes.rs
+++ b/meilisearch/src/routes/swap_indexes.rs
@@ -9,7 +9,6 @@ use meilisearch_types::error::ResponseError;
 use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::tasks::{IndexSwap, KindWithContent};
 use serde::Serialize;
-use serde_json::json;
 
 use super::{get_task_id, is_dry_run, SummarizedTaskView};
 use crate::analytics::{Aggregate, Analytics};
diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs
index 162d19ca1..f04e2ead2 100644
--- a/meilisearch/src/routes/tasks.rs
+++ b/meilisearch/src/routes/tasks.rs
@@ -180,7 +180,7 @@ struct TaskFilterAnalytics<Method: AggregateMethod> {
     marker: std::marker::PhantomData<Method>,
 }
 
-impl<Method: AggregateMethod> Aggregate for TaskFilterAnalytics<Method> {
+impl<Method: AggregateMethod + 'static> Aggregate for TaskFilterAnalytics<Method> {
     fn event_name(&self) -> &'static str {
         Method::event_name()
     }

From 6728cfbfac2a1b3e56b7bb7f13687dc610b48ca3 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 17 Oct 2024 00:38:18 +0200
Subject: [PATCH 55/92] fix the analytics

---
 Cargo.lock                                    |  7 ++
 meilisearch/Cargo.toml                        |  1 +
 meilisearch/src/analytics/mod.rs              | 34 ++++++---
 .../src/analytics/segment_analytics.rs        | 76 ++++++++++++-------
 4 files changed, 81 insertions(+), 37 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index c85a59952..733470384 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3415,6 +3415,7 @@ dependencies = [
  "meilisearch-types",
  "mimalloc",
  "mime",
+ "mopa",
  "num_cpus",
  "obkv",
  "once_cell",
@@ -3681,6 +3682,12 @@ dependencies = [
  "syn 2.0.60",
 ]
 
+[[package]]
+name = "mopa"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a785740271256c230f57462d3b83e52f998433a7062fc18f96d5999474a9f915"
+
 [[package]]
 name = "mutually_exclusive_features"
 version = "0.0.3"
diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml
index 6c2fb4060..322b333ac 100644
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -104,6 +104,7 @@ tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
 tracing-actix-web = "0.7.11"
 build-info = { version = "1.7.0", path = "../build-info" }
 roaring = "0.10.2"
+mopa = "0.2.2"
 
 [dev-dependencies]
 actix-rt = "2.10.0"
diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs
index ab6fd9993..8a0a68bad 100644
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@@ -6,9 +6,9 @@ use std::str::FromStr;
 
 use actix_web::HttpRequest;
 use meilisearch_types::InstanceUid;
+use mopa::mopafy;
 use once_cell::sync::Lazy;
 use platform_dirs::AppDirs;
-use serde::Serialize;
 
 // if the feature analytics is enabled we use the real analytics
 pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
@@ -31,11 +31,11 @@ macro_rules! empty_analytics {
                 $event_name
             }
 
-            fn aggregate(self, _other: Self) -> Self {
+            fn aggregate(self: Box<Self>, _other: Box<Self>) -> Box<Self> {
                 self
             }
 
-            fn into_event(self) -> impl serde::Serialize {
+            fn into_event(self: Box<Self>) -> serde_json::Value {
                 serde_json::json!({})
             }
         }
@@ -80,18 +80,34 @@ pub enum DocumentFetchKind {
     Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
 }
 
-pub trait Aggregate: 'static {
+pub trait Aggregate: 'static + mopa::Any + Send {
     fn event_name(&self) -> &'static str;
 
-    fn aggregate(self, other: Self) -> Self
+    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self>
     where
         Self: Sized;
 
-    fn into_event(self) -> impl Serialize
+    fn downcast_aggregate(
+        this: Box<dyn Aggregate>,
+        other: Box<dyn Aggregate>,
+    ) -> Option<Box<dyn Aggregate>>
     where
-        Self: Sized;
+        Self: Sized,
+    {
+        if this.is::<Self>() && other.is::<Self>() {
+            let this = this.downcast::<Self>().ok()?;
+            let other = other.downcast::<Self>().ok()?;
+            Some(Self::aggregate(this, other))
+        } else {
+            None
+        }
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value;
 }
 
+mopafy!(Aggregate);
+
 /// Helper trait to define multiple aggregate with the same content but a different name.
 /// Commonly used when you must aggregate a search with POST or with GET for example.
 pub trait AggregateMethod: 'static + Default {
@@ -137,9 +153,9 @@ impl Analytics {
     }
 
     /// The method used to publish most analytics that do not need to be batched every hours
-    pub fn publish(&self, event: impl Aggregate, request: &HttpRequest) {
+    pub fn publish<T: Aggregate>(&self, event: T, request: &HttpRequest) {
         let Some(ref segment) = self.segment else { return };
         let user_agents = extract_user_agents(request);
-        let _ = segment.sender.try_send(Box::new(event));
+        let _ = segment.sender.try_send(segment_analytics::Message::new(event));
     }
 }
diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 601fefa1e..1a1bb9226 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -1,7 +1,6 @@
-use std::any::{Any, TypeId};
+use std::any::TypeId;
 use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet};
 use std::fs;
-use std::mem::take;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 use std::time::{Duration, Instant};
@@ -72,10 +71,26 @@ pub fn extract_user_agents(request: &HttpRequest) -> Vec<String> {
         .collect()
 }
 
+pub struct Message {
+    type_id: TypeId,
+    event: Box<dyn Aggregate>,
+    aggregator_function: fn(Box<dyn Aggregate>, Box<dyn Aggregate>) -> Option<Box<dyn Aggregate>>,
+}
+
+impl Message {
+    pub fn new<T: Aggregate>(event: T) -> Self {
+        Self {
+            type_id: TypeId::of::<T>(),
+            event: Box::new(event),
+            aggregator_function: T::downcast_aggregate,
+        }
+    }
+}
+
 pub struct SegmentAnalytics {
     pub instance_uid: InstanceUid,
     pub user: User,
-    pub sender: Sender<Box<dyn Aggregate>>,
+    pub sender: Sender<Message>,
 }
 
 impl SegmentAnalytics {
@@ -378,7 +393,7 @@ impl From<Opt> for Infos {
 }
 
 pub struct Segment {
-    inbox: Receiver<Box<dyn Aggregate>>,
+    inbox: Receiver<Message>,
     user: User,
     opt: Opt,
     batcher: AutoBatcher,
@@ -435,8 +450,13 @@ impl Segment {
                 },
                 msg = self.inbox.recv() => {
                     match msg {
-                        // Some(AnalyticsMsg::BatchMessage(msg)) => drop(self.batcher.push(msg).await),
-                        Some(_) => todo!(),
+                        Some(Message { type_id, event, aggregator_function }) => {
+                            let new_event = match self.events.remove(&type_id) {
+                                Some(old) => (aggregator_function)(old, event).unwrap(),
+                                None => event,
+                            };
+                            self.events.insert(type_id, new_event);
+                       },
                         None => (),
                     }
                 }
@@ -479,9 +499,9 @@ impl Segment {
         // We empty the list of events
         let events = std::mem::take(&mut self.events);
 
-        for (_, mut event) in events {
+        for (_, event) in events {
             self.batcher.push(Track {
-                user: self.user,
+                user: self.user.clone(),
                 event: event.event_name().to_string(),
                 properties: event.into_event(),
                 timestamp: todo!(),
@@ -722,11 +742,11 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
         Method::event_name()
     }
 
-    fn aggregate(mut self, mut other: Self) -> Self {
+    fn aggregate(mut self: Box<Self>, other: Box<Self>) -> Box<Self> {
         let Self {
             total_received,
             total_succeeded,
-            ref mut time_spent,
+            mut time_spent,
             sort_with_geo_point,
             sort_sum_of_criteria_terms,
             sort_total_number_of_criteria,
@@ -761,9 +781,9 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
             total_degraded,
             total_used_negative_operator,
             ranking_score_threshold,
-            ref mut locales,
+            mut locales,
             marker: _,
-        } = other;
+        } = *other;
 
         // request
         self.total_received = self.total_received.saturating_add(total_received);
@@ -771,7 +791,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
         self.total_degraded = self.total_degraded.saturating_add(total_degraded);
         self.total_used_negative_operator =
             self.total_used_negative_operator.saturating_add(total_used_negative_operator);
-        self.time_spent.append(time_spent);
+        self.time_spent.append(&mut time_spent);
 
         // sort
         self.sort_with_geo_point |= sort_with_geo_point;
@@ -843,12 +863,12 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
         self.ranking_score_threshold |= ranking_score_threshold;
 
         // locales
-        self.locales.append(locales);
+        self.locales.append(&mut locales);
 
         self
     }
 
-    fn into_event(self) -> impl Serialize {
+    fn into_event(self: Box<Self>) -> serde_json::Value {
         let Self {
             total_received,
             total_succeeded,
@@ -889,7 +909,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
             ranking_score_threshold,
             locales,
             marker: _,
-        } = self;
+        } = *self;
 
         // we get all the values in a sorted manner
         let time_spent = time_spent.into_sorted_vec();
@@ -1058,11 +1078,11 @@ impl Aggregate for MultiSearchAggregator {
     }
 
     /// Aggregate one [MultiSearchAggregator] into another.
-    fn aggregate(self, other: Self) -> Self {
+    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
         // write the aggregate in a way that will cause a compilation error if a field is added.
 
         // get ownership of self, replacing it by a default value.
-        let this = self;
+        let this = *self;
 
         let total_received = this.total_received.saturating_add(other.total_received);
         let total_succeeded = this.total_succeeded.saturating_add(other.total_succeeded);
@@ -1075,7 +1095,7 @@ impl Aggregate for MultiSearchAggregator {
             this.show_ranking_score_details || other.show_ranking_score_details;
         let use_federation = this.use_federation || other.use_federation;
 
-        Self {
+        Box::new(Self {
             total_received,
             total_succeeded,
             total_distinct_index_count,
@@ -1084,10 +1104,10 @@ impl Aggregate for MultiSearchAggregator {
             show_ranking_score,
             show_ranking_score_details,
             use_federation,
-        }
+        })
     }
 
-    fn into_event(self) -> impl Serialize {
+    fn into_event(self: Box<Self>) -> serde_json::Value {
         let Self {
             total_received,
             total_succeeded,
@@ -1097,7 +1117,7 @@ impl Aggregate for MultiSearchAggregator {
             show_ranking_score,
             show_ranking_score_details,
             use_federation,
-        } = self;
+        } = *self;
 
         json!({
             "requests": {
@@ -1708,11 +1728,11 @@ impl<Method: AggregateMethod> Aggregate for SimilarAggregator<Method> {
     }
 
     /// Aggregate one [SimilarAggregator] into another.
-    fn aggregate(mut self, mut other: Self) -> Self {
+    fn aggregate(mut self: Box<Self>, other: Box<Self>) -> Box<Self> {
         let Self {
             total_received,
             total_succeeded,
-            ref mut time_spent,
+            mut time_spent,
             filter_with_geo_radius,
             filter_with_geo_bounding_box,
             filter_sum_of_criteria_terms,
@@ -1726,12 +1746,12 @@ impl<Method: AggregateMethod> Aggregate for SimilarAggregator<Method> {
             ranking_score_threshold,
             retrieve_vectors,
             marker: _,
-        } = other;
+        } = *other;
 
         // request
         self.total_received = self.total_received.saturating_add(total_received);
         self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
-        self.time_spent.append(time_spent);
+        self.time_spent.append(&mut time_spent);
 
         // filter
         self.filter_with_geo_radius |= filter_with_geo_radius;
@@ -1763,7 +1783,7 @@ impl<Method: AggregateMethod> Aggregate for SimilarAggregator<Method> {
         self
     }
 
-    fn into_event(self) -> impl Serialize {
+    fn into_event(self: Box<Self>) -> serde_json::Value {
         let Self {
             total_received,
             total_succeeded,
@@ -1781,7 +1801,7 @@ impl<Method: AggregateMethod> Aggregate for SimilarAggregator<Method> {
             ranking_score_threshold,
             retrieve_vectors,
             marker: _,
-        } = self;
+        } = *self;
 
         // we get all the values in a sorted manner
         let time_spent = time_spent.into_sorted_vec();

From aa7a34ffe8b9572c44b4bd36c30f7cf3805a9ed7 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 17 Oct 2024 00:43:34 +0200
Subject: [PATCH 56/92] make the aggregate method send

---
 meilisearch/src/analytics/mod.rs               | 2 +-
 meilisearch/src/analytics/segment_analytics.rs | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs
index 8a0a68bad..f8a589901 100644
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@@ -110,7 +110,7 @@ mopafy!(Aggregate);
 
 /// Helper trait to define multiple aggregate with the same content but a different name.
 /// Commonly used when you must aggregate a search with POST or with GET for example.
-pub trait AggregateMethod: 'static + Default {
+pub trait AggregateMethod: 'static + Default + Send {
     fn event_name() -> &'static str;
 }
 
diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 1a1bb9226..92f03e48e 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -72,9 +72,12 @@ pub fn extract_user_agents(request: &HttpRequest) -> Vec<String> {
 }
 
 pub struct Message {
+    // Since the type_id is solved statically we cannot retrieve it from the Box.
+    // Thus we have to send it in the message directly.
     type_id: TypeId,
-    event: Box<dyn Aggregate>,
+    // Same for the aggregate function.
     aggregator_function: fn(Box<dyn Aggregate>, Box<dyn Aggregate>) -> Option<Box<dyn Aggregate>>,
+    event: Box<dyn Aggregate>,
 }
 
 impl Message {

From e4ace98004fff86e35fe8dd4a2cdccfa8b03ce9f Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 17 Oct 2024 01:04:25 +0200
Subject: [PATCH 57/92] fix all the routes + move to a better version of mopa

---
 Cargo.lock                                    |  8 ++--
 meilisearch/Cargo.toml                        |  2 +-
 meilisearch/src/analytics/mod.rs              |  2 +
 meilisearch/src/routes/features.rs            | 13 ++----
 meilisearch/src/routes/indexes/documents.rs   | 46 ++++++++-----------
 .../src/routes/indexes/facet_search.rs        | 10 ++--
 meilisearch/src/routes/indexes/mod.rs         | 23 +++++-----
 meilisearch/src/routes/indexes/settings.rs    | 16 ++-----
 meilisearch/src/routes/swap_indexes.rs        | 10 ++--
 meilisearch/src/routes/tasks.rs               | 10 ++--
 10 files changed, 65 insertions(+), 75 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 733470384..500f28454 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3415,7 +3415,7 @@ dependencies = [
  "meilisearch-types",
  "mimalloc",
  "mime",
- "mopa",
+ "mopa-maintained",
  "num_cpus",
  "obkv",
  "once_cell",
@@ -3683,10 +3683,10 @@ dependencies = [
 ]
 
 [[package]]
-name = "mopa"
-version = "0.2.2"
+name = "mopa-maintained"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a785740271256c230f57462d3b83e52f998433a7062fc18f96d5999474a9f915"
+checksum = "79b7f3e22167862cc7c95b21a6f326c22e4bf40da59cbf000b368a310173ba11"
 
 [[package]]
 name = "mutually_exclusive_features"
diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml
index 322b333ac..07357e724 100644
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -104,7 +104,7 @@ tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
 tracing-actix-web = "0.7.11"
 build-info = { version = "1.7.0", path = "../build-info" }
 roaring = "0.10.2"
-mopa = "0.2.2"
+mopa-maintained = "0.2.3"
 
 [dev-dependencies]
 actix-rt = "2.10.0"
diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs
index f8a589901..b3e8109a3 100644
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@@ -1,3 +1,5 @@
+#![allow(clippy::transmute_ptr_to_ref)] // mopify isn't updated with the latest version of clippy yet
+
 pub mod segment_analytics;
 
 use std::fs;
diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs
index 0b43c3f13..1de00717d 100644
--- a/meilisearch/src/routes/features.rs
+++ b/meilisearch/src/routes/features.rs
@@ -69,21 +69,18 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
         "Experimental features Updated"
     }
 
-    fn aggregate(self, other: Self) -> Self
-    where
-        Self: Sized,
-    {
-        Self {
+    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+        Box::new(Self {
             vector_store: other.vector_store,
             metrics: other.metrics,
             logs_route: other.logs_route,
             edit_documents_by_function: other.edit_documents_by_function,
             contains_filter: other.contains_filter,
-        }
+        })
     }
 
-    fn into_event(self) -> impl Serialize {
-        self
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
     }
 }
 
diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs
index 1573b768b..854fa5b69 100644
--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@@ -162,8 +162,8 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
         Method::event_name()
     }
 
-    fn aggregate(self, other: Self) -> Self {
-        Self {
+    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+        Box::new(Self {
             total_received: self.total_received.saturating_add(other.total_received),
             per_document_id: self.per_document_id | other.per_document_id,
             per_filter: self.per_filter | other.per_filter,
@@ -171,11 +171,11 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
             max_limit: self.max_limit.max(other.max_limit),
             max_offset: self.max_offset.max(other.max_offset),
             marker: PhantomData,
-        }
+        })
     }
 
-    fn into_event(self) -> impl Serialize {
-        self
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
     }
 }
 
@@ -226,21 +226,18 @@ impl Aggregate for DocumentsDeletionAggregator {
         "Documents Deleted"
     }
 
-    fn aggregate(self, other: Self) -> Self
-    where
-        Self: Sized,
-    {
-        Self {
+    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+        Box::new(Self {
             total_received: self.total_received.saturating_add(other.total_received),
             per_document_id: self.per_document_id | other.per_document_id,
             clear_all: self.clear_all | other.clear_all,
             per_batch: self.per_batch | other.per_batch,
             per_filter: self.per_filter | other.per_filter,
-        }
+        })
     }
 
-    fn into_event(self) -> impl Serialize {
-        self
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
     }
 }
 
@@ -443,17 +440,17 @@ impl<Method: AggregateMethod> Aggregate for DocumentsAggregator<Method> {
         Method::event_name()
     }
 
-    fn aggregate(self, other: Self) -> Self {
-        Self {
+    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+        Box::new(Self {
             payload_types: self.payload_types.union(&other.payload_types).cloned().collect(),
             primary_key: self.primary_key.union(&other.primary_key).cloned().collect(),
             index_creation: self.index_creation | other.index_creation,
             method: PhantomData,
-        }
+        })
     }
 
-    fn into_event(self) -> impl Serialize {
-        self
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(self).unwrap_or_default()
     }
 }
 
@@ -811,19 +808,16 @@ impl Aggregate for EditDocumentsByFunctionAggregator {
         "Documents Edited By Function"
     }
 
-    fn aggregate(self, other: Self) -> Self
-    where
-        Self: Sized,
-    {
-        Self {
+    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+        Box::new(Self {
             filtered: self.filtered | other.filtered,
             with_context: self.with_context | other.with_context,
             index_creation: self.index_creation | other.index_creation,
-        }
+        })
     }
 
-    fn into_event(self) -> impl Serialize {
-        self
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
     }
 }
 
diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs
index 08618970d..715eaaaa7 100644
--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@@ -114,29 +114,29 @@ impl Aggregate for FacetSearchAggregator {
         "Facet Searched POST"
     }
 
-    fn aggregate(mut self, other: Self) -> Self {
+    fn aggregate(mut self: Box<Self>, other: Box<Self>) -> Box<Self> {
         for time in other.time_spent {
             self.time_spent.push(time);
         }
 
-        Self {
+        Box::new(Self {
             total_received: self.total_received.saturating_add(other.total_received),
             total_succeeded: self.total_succeeded.saturating_add(other.total_succeeded),
             time_spent: self.time_spent,
             facet_names: self.facet_names.union(&other.facet_names).cloned().collect(),
             additional_search_parameters_provided: self.additional_search_parameters_provided
                 | other.additional_search_parameters_provided,
-        }
+        })
     }
 
-    fn into_event(self) -> impl Serialize {
+    fn into_event(self: Box<Self>) -> serde_json::Value {
         let Self {
             total_received,
             total_succeeded,
             time_spent,
             facet_names,
             additional_search_parameters_provided,
-        } = self;
+        } = *self;
         // the index of the 99th percentage of value
         let percentile_99th = 0.99 * (total_succeeded as f64 - 1.) + 1.;
         // we get all the values in a sorted manner
diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs
index 3c41f36fe..8972119d7 100644
--- a/meilisearch/src/routes/indexes/mod.rs
+++ b/meilisearch/src/routes/indexes/mod.rs
@@ -133,15 +133,14 @@ impl Aggregate for IndexCreatedAggregate {
         "Index Created"
     }
 
-    fn aggregate(self, other: Self) -> Self
-    where
-        Self: Sized,
-    {
-        Self { primary_key: self.primary_key.union(&other.primary_key).cloned().collect() }
+    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+        Box::new(Self {
+            primary_key: self.primary_key.union(&other.primary_key).cloned().collect(),
+        })
     }
 
-    fn into_event(self) -> impl Serialize {
-        self
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
     }
 }
 
@@ -225,12 +224,14 @@ impl Aggregate for IndexUpdatedAggregate {
         "Index Updated"
     }
 
-    fn aggregate(self, other: Self) -> Self {
-        Self { primary_key: self.primary_key.union(&other.primary_key).cloned().collect() }
+    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+        Box::new(Self {
+            primary_key: self.primary_key.union(&other.primary_key).cloned().collect(),
+        })
     }
 
-    fn into_event(self) -> impl Serialize {
-        self
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
     }
 }
 pub async fn update_index(
diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs
index bb2f6792d..f31f52dc1 100644
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -437,11 +437,8 @@ impl Aggregate for SettingsAnalytics {
         "Settings Updated"
     }
 
-    fn aggregate(self, other: Self) -> Self
-    where
-        Self: Sized,
-    {
-        Self {
+    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+        Box::new(Self {
             ranking_rules: RankingRulesAnalytics {
                 words_position: self
                     .ranking_rules
@@ -586,14 +583,11 @@ impl Aggregate for SettingsAnalytics {
             non_separator_tokens: NonSeparatorTokensAnalytics {
                 total: self.non_separator_tokens.total.or(other.non_separator_tokens.total),
             },
-        }
+        })
     }
 
-    fn into_event(self) -> impl Serialize
-    where
-        Self: Sized,
-    {
-        self
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
     }
 }
 
diff --git a/meilisearch/src/routes/swap_indexes.rs b/meilisearch/src/routes/swap_indexes.rs
index abdffbb73..f7d8f4eff 100644
--- a/meilisearch/src/routes/swap_indexes.rs
+++ b/meilisearch/src/routes/swap_indexes.rs
@@ -39,12 +39,14 @@ impl Aggregate for IndexSwappedAnalytics {
         "Indexes Swapped"
     }
 
-    fn aggregate(self, other: Self) -> Self {
-        Self { swap_operation_number: self.swap_operation_number.max(other.swap_operation_number) }
+    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+        Box::new(Self {
+            swap_operation_number: self.swap_operation_number.max(other.swap_operation_number),
+        })
     }
 
-    fn into_event(self) -> impl Serialize {
-        self
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
     }
 }
 
diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs
index f04e2ead2..ff4aee998 100644
--- a/meilisearch/src/routes/tasks.rs
+++ b/meilisearch/src/routes/tasks.rs
@@ -185,8 +185,8 @@ impl<Method: AggregateMethod + 'static> Aggregate for TaskFilterAnalytics<Method
         Method::event_name()
     }
 
-    fn aggregate(self, other: Self) -> Self {
-        Self {
+    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+        Box::new(Self {
             filtered_by_uid: self.filtered_by_uid | other.filtered_by_uid,
             filtered_by_index_uid: self.filtered_by_index_uid | other.filtered_by_index_uid,
             filtered_by_type: self.filtered_by_type | other.filtered_by_type,
@@ -206,11 +206,11 @@ impl<Method: AggregateMethod + 'static> Aggregate for TaskFilterAnalytics<Method
                 | other.filtered_by_after_finished_at,
 
             marker: std::marker::PhantomData,
-        }
+        })
     }
 
-    fn into_event(self) -> impl Serialize {
-        self
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
     }
 }
 

From 7382fb21e41719a6be6dbf5f25b6c47ad7afc581 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 17 Oct 2024 08:38:11 +0200
Subject: [PATCH 58/92] fix the main

---
 meilisearch/src/analytics/mod.rs              | 24 +++++++++++++------
 .../src/analytics/segment_analytics.rs        | 10 ++++----
 meilisearch/src/lib.rs                        |  6 ++---
 meilisearch/src/main.rs                       | 22 +++++------------
 meilisearch/src/routes/indexes/search.rs      |  4 ++--
 5 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs
index b3e8109a3..91139e1dd 100644
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@@ -5,8 +5,11 @@ pub mod segment_analytics;
 use std::fs;
 use std::path::{Path, PathBuf};
 use std::str::FromStr;
+use std::sync::Arc;
 
 use actix_web::HttpRequest;
+use index_scheduler::IndexScheduler;
+use meilisearch_auth::AuthController;
 use meilisearch_types::InstanceUid;
 use mopa::mopafy;
 use once_cell::sync::Lazy;
@@ -17,6 +20,8 @@ pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
 pub use segment_analytics::SearchAggregator;
 pub use segment_analytics::SimilarAggregator;
 
+use crate::Opt;
+
 use self::segment_analytics::extract_user_agents;
 pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
 pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
@@ -137,17 +142,22 @@ macro_rules! aggregate_methods {
     };
 }
 
+#[derive(Clone)]
 pub struct Analytics {
-    segment: Option<SegmentAnalytics>,
+    segment: Option<Arc<SegmentAnalytics>>,
 }
 
 impl Analytics {
-    fn no_analytics() -> Self {
-        Self { segment: None }
-    }
-
-    fn segment_analytics(segment: SegmentAnalytics) -> Self {
-        Self { segment: Some(segment) }
+    pub async fn new(
+        opt: &Opt,
+        index_scheduler: Arc<IndexScheduler>,
+        auth_controller: Arc<AuthController>,
+    ) -> Self {
+        if opt.no_analytics {
+            Self { segment: None }
+        } else {
+            Self { segment: SegmentAnalytics::new(opt, index_scheduler, auth_controller).await }
+        }
     }
 
     pub fn instance_uid(&self) -> Option<&InstanceUid> {
diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 92f03e48e..3496853ff 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -102,7 +102,7 @@ impl SegmentAnalytics {
         opt: &Opt,
         index_scheduler: Arc<IndexScheduler>,
         auth_controller: Arc<AuthController>,
-    ) -> Arc<Analytics> {
+    ) -> Option<Arc<Self>> {
         let instance_uid = super::find_user_id(&opt.db_path);
         let first_time_run = instance_uid.is_none();
         let instance_uid = instance_uid.unwrap_or_else(Uuid::new_v4);
@@ -112,7 +112,7 @@ impl SegmentAnalytics {
 
         // if reqwest throws an error we won't be able to send analytics
         if client.is_err() {
-            return Arc::new(Analytics::no_analytics());
+            return None;
         }
 
         let client =
@@ -148,13 +148,13 @@ impl SegmentAnalytics {
             user: user.clone(),
             opt: opt.clone(),
             batcher,
-            events: todo!(),
+            events: HashMap::new(),
         });
         tokio::spawn(segment.run(index_scheduler.clone(), auth_controller.clone()));
 
         let this = Self { instance_uid, sender, user: user.clone() };
 
-        Arc::new(Analytics::segment_analytics(this))
+        Some(Arc::new(this))
     }
 }
 
@@ -595,7 +595,7 @@ pub struct SearchAggregator<Method: AggregateMethod> {
 
 impl<Method: AggregateMethod> SearchAggregator<Method> {
     #[allow(clippy::field_reassign_with_default)]
-    pub fn from_query(query: &SearchQuery, request: &HttpRequest) -> Self {
+    pub fn from_query(query: &SearchQuery) -> Self {
         let SearchQuery {
             q,
             vector,
diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs
index 80177876a..633ad2776 100644
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@@ -120,7 +120,7 @@ pub fn create_app(
     search_queue: Data<SearchQueue>,
     opt: Opt,
     logs: (LogRouteHandle, LogStderrHandle),
-    analytics: Arc<Analytics>,
+    analytics: Data<Analytics>,
     enable_dashboard: bool,
 ) -> actix_web::App<
     impl ServiceFactory<
@@ -473,14 +473,14 @@ pub fn configure_data(
     search_queue: Data<SearchQueue>,
     opt: &Opt,
     (logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
-    analytics: Arc<Analytics>,
+    analytics: Data<Analytics>,
 ) {
     let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
     config
         .app_data(index_scheduler)
         .app_data(auth)
         .app_data(search_queue)
-        .app_data(web::Data::from(analytics))
+        .app_data(analytics)
         .app_data(web::Data::new(logs_route))
         .app_data(web::Data::new(logs_stderr))
         .app_data(web::Data::new(opt.clone()))
diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs
index de9784d15..eebea3b6d 100644
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
@@ -124,19 +124,12 @@ async fn try_main() -> anyhow::Result<()> {
 
     let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
 
-    #[cfg(all(not(debug_assertions), feature = "analytics"))]
-    let analytics = if !opt.no_analytics {
-        analytics::SegmentAnalytics::new(&opt, index_scheduler.clone(), auth_controller.clone())
-            .await
-    } else {
-        analytics::MockAnalytics::new(&opt)
-    };
-    #[cfg(any(debug_assertions, not(feature = "analytics")))]
-    let analytics = analytics::MockAnalytics::new(&opt);
+    let analytics =
+        analytics::Analytics::new(&opt, index_scheduler.clone(), auth_controller.clone()).await;
 
     print_launch_resume(&opt, analytics.clone(), config_read_from);
 
-    run_http(index_scheduler, auth_controller, opt, log_handle, analytics).await?;
+    run_http(index_scheduler, auth_controller, opt, log_handle, Arc::new(analytics)).await?;
 
     Ok(())
 }
@@ -146,12 +139,13 @@ async fn run_http(
     auth_controller: Arc<AuthController>,
     opt: Opt,
     logs: (LogRouteHandle, LogStderrHandle),
-    analytics: Arc<dyn Analytics>,
+    analytics: Arc<Analytics>,
 ) -> anyhow::Result<()> {
     let enable_dashboard = &opt.env == "development";
     let opt_clone = opt.clone();
     let index_scheduler = Data::from(index_scheduler);
     let auth_controller = Data::from(auth_controller);
+    let analytics = Data::from(analytics);
     let search_queue = SearchQueue::new(
         opt.experimental_search_queue_size,
         available_parallelism()
@@ -187,11 +181,7 @@ async fn run_http(
     Ok(())
 }
 
-pub fn print_launch_resume(
-    opt: &Opt,
-    analytics: Arc<dyn Analytics>,
-    config_read_from: Option<PathBuf>,
-) {
+pub fn print_launch_resume(opt: &Opt, analytics: Analytics, config_read_from: Option<PathBuf>) {
     let build_info = build_info::BuildInfo::from_build();
 
     let protocol =
diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs
index 538c46fd0..ac6e23c8f 100644
--- a/meilisearch/src/routes/indexes/search.rs
+++ b/meilisearch/src/routes/indexes/search.rs
@@ -238,7 +238,7 @@ pub async fn search_with_url_query(
         add_search_rules(&mut query.filter, search_rules);
     }
 
-    let mut aggregate = SearchAggregator::<SearchGET>::from_query(&query, &req);
+    let mut aggregate = SearchAggregator::<SearchGET>::from_query(&query);
 
     let index = index_scheduler.index(&index_uid)?;
     let features = index_scheduler.features();
@@ -281,7 +281,7 @@ pub async fn search_with_post(
         add_search_rules(&mut query.filter, search_rules);
     }
 
-    let mut aggregate = SearchAggregator::<SearchPOST>::from_query(&query, &req);
+    let mut aggregate = SearchAggregator::<SearchPOST>::from_query(&query);
 
     let index = index_scheduler.index(&index_uid)?;
 

From ef77c7699b21422b4857878d072494e1bfc49d6b Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 17 Oct 2024 09:06:23 +0200
Subject: [PATCH 59/92] add the required shared values between all the events
 and fix the timestamp

---
 meilisearch/src/analytics/mod.rs              |  6 +-
 .../src/analytics/segment_analytics.rs        | 75 +++++++++++++------
 2 files changed, 57 insertions(+), 24 deletions(-)

diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs
index 91139e1dd..a3b8d6d1d 100644
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@@ -166,8 +166,8 @@ impl Analytics {
 
     /// The method used to publish most analytics that do not need to be batched every hours
     pub fn publish<T: Aggregate>(&self, event: T, request: &HttpRequest) {
-        let Some(ref segment) = self.segment else { return };
-        let user_agents = extract_user_agents(request);
-        let _ = segment.sender.try_send(segment_analytics::Message::new(event));
+        if let Some(ref segment) = self.segment {
+            let _ = segment.sender.try_send(segment_analytics::Message::new(event, request));
+        }
     }
 }
diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 3496853ff..00a3adaaf 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -28,7 +28,6 @@ use super::{
     config_user_id_path, Aggregate, AggregateMethod, DocumentDeletionKind, DocumentFetchKind,
     MEILISEARCH_CONFIG_PATH,
 };
-use crate::analytics::Analytics;
 use crate::option::{
     default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot,
 };
@@ -58,7 +57,7 @@ fn write_user_id(db_path: &Path, user_id: &InstanceUid) {
 
 const SEGMENT_API_KEY: &str = "P3FWhhEsJiEDCuEHpmcN9DHcK4hVfBvb";
 
-pub fn extract_user_agents(request: &HttpRequest) -> Vec<String> {
+pub fn extract_user_agents(request: &HttpRequest) -> HashSet<String> {
     request
         .headers()
         .get(ANALYTICS_HEADER)
@@ -77,14 +76,26 @@ pub struct Message {
     type_id: TypeId,
     // Same for the aggregate function.
     aggregator_function: fn(Box<dyn Aggregate>, Box<dyn Aggregate>) -> Option<Box<dyn Aggregate>>,
-    event: Box<dyn Aggregate>,
+    event: Event,
+}
+
+pub struct Event {
+    original: Box<dyn Aggregate>,
+    timestamp: OffsetDateTime,
+    user_agents: HashSet<String>,
+    total: usize,
 }
 
 impl Message {
-    pub fn new<T: Aggregate>(event: T) -> Self {
+    pub fn new<T: Aggregate>(event: T, request: &HttpRequest) -> Self {
         Self {
             type_id: TypeId::of::<T>(),
-            event: Box::new(event),
+            event: Event {
+                original: Box::new(event),
+                timestamp: OffsetDateTime::now_utc(),
+                user_agents: extract_user_agents(request),
+                total: 1,
+            },
             aggregator_function: T::downcast_aggregate,
         }
     }
@@ -400,7 +411,7 @@ pub struct Segment {
     user: User,
     opt: Opt,
     batcher: AutoBatcher,
-    events: HashMap<TypeId, Box<dyn Aggregate>>,
+    events: HashMap<TypeId, Event>,
 }
 
 impl Segment {
@@ -451,22 +462,34 @@ impl Segment {
                 _ = interval.tick() => {
                     self.tick(index_scheduler.clone(), auth_controller.clone()).await;
                 },
-                msg = self.inbox.recv() => {
-                    match msg {
-                        Some(Message { type_id, event, aggregator_function }) => {
-                            let new_event = match self.events.remove(&type_id) {
-                                Some(old) => (aggregator_function)(old, event).unwrap(),
-                                None => event,
-                            };
-                            self.events.insert(type_id, new_event);
-                       },
-                        None => (),
-                    }
-                }
+                Some(msg) = self.inbox.recv() => {
+                    self.handle_msg(msg);
+               }
             }
         }
     }
 
+    fn handle_msg(&mut self, Message { type_id, aggregator_function, event }: Message) {
+        let new_event = match self.events.remove(&type_id) {
+            Some(old) => {
+                // The function should never fail since we retrieved the corresponding TypeId in the map. But in the unfortunate
+                // case it could happens we're going to silently ignore the error
+                let Some(original) = (aggregator_function)(old.original, event.original) else {
+                    return;
+                };
+                Event {
+                    original,
+                    // We always want to return the FIRST timestamp ever encountered
+                    timestamp: old.timestamp,
+                    user_agents: old.user_agents.union(&event.user_agents).cloned().collect(),
+                    total: old.total.saturating_add(event.total),
+                }
+            }
+            None => event,
+        };
+        self.events.insert(type_id, new_event);
+    }
+
     async fn tick(
         &mut self,
         index_scheduler: Arc<IndexScheduler>,
@@ -503,11 +526,21 @@ impl Segment {
         let events = std::mem::take(&mut self.events);
 
         for (_, event) in events {
+            let Event { original, timestamp, user_agents, total } = event;
+            let name = original.event_name();
+            let mut properties = original.into_event();
+            if properties["user-agent"].is_null() {
+                properties["user-agent"] = json!(user_agents);
+            };
+            if properties["requests"]["total_received"].is_null() {
+                properties["requests"]["total_received"] = total.into();
+            };
+
             self.batcher.push(Track {
                 user: self.user.clone(),
-                event: event.event_name().to_string(),
-                properties: event.into_event(),
-                timestamp: todo!(),
+                event: name.to_string(),
+                properties,
+                timestamp: Some(timestamp),
                 ..Default::default()
             });
         }

From 4ee65d870eab55f0c5098aaad659aa98fbd9d500 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 17 Oct 2024 09:14:34 +0200
Subject: [PATCH 60/92] remove a lot of ununsed code

---
 meilisearch/src/analytics/mod.rs              |   4 +-
 .../src/analytics/segment_analytics.rs        | 598 +-----------------
 .../src/routes/indexes/facet_search.rs        |   1 -
 3 files changed, 17 insertions(+), 586 deletions(-)

diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs
index a3b8d6d1d..d08f3307c 100644
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@@ -22,9 +22,7 @@ pub use segment_analytics::SimilarAggregator;
 
 use crate::Opt;
 
-use self::segment_analytics::extract_user_agents;
-pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
-pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
+pub use self::segment_analytics::MultiSearchAggregator;
 
 /// A macro used to quickly define events that don't aggregate or send anything besides an empty event with its name.
 #[macro_export]
diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 00a3adaaf..1edfa1bdd 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -5,7 +5,7 @@ use std::path::{Path, PathBuf};
 use std::sync::Arc;
 use std::time::{Duration, Instant};
 
-use actix_web::http::header::{CONTENT_TYPE, USER_AGENT};
+use actix_web::http::header::USER_AGENT;
 use actix_web::HttpRequest;
 use byte_unit::Byte;
 use index_scheduler::IndexScheduler;
@@ -24,21 +24,15 @@ use tokio::select;
 use tokio::sync::mpsc::{self, Receiver, Sender};
 use uuid::Uuid;
 
-use super::{
-    config_user_id_path, Aggregate, AggregateMethod, DocumentDeletionKind, DocumentFetchKind,
-    MEILISEARCH_CONFIG_PATH,
-};
+use super::{config_user_id_path, Aggregate, AggregateMethod, MEILISEARCH_CONFIG_PATH};
 use crate::option::{
     default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot,
 };
-use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery};
-use crate::routes::indexes::facet_search::FacetSearchQuery;
 use crate::routes::{create_all_stats, Stats};
 use crate::search::{
-    FacetSearchResult, FederatedSearch, MatchingStrategy, SearchQuery, SearchQueryWithIndex,
-    SearchResult, SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
-    DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
-    DEFAULT_SEMANTIC_RATIO,
+    FederatedSearch, SearchQuery, SearchQueryWithIndex, SearchResult, SimilarQuery, SimilarResult,
+    DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
+    DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEMANTIC_RATIO,
 };
 use crate::{aggregate_methods, Opt};
 
@@ -75,6 +69,7 @@ pub struct Message {
     // Thus we have to send it in the message directly.
     type_id: TypeId,
     // Same for the aggregate function.
+    #[allow(clippy::type_complexity)]
     aggregator_function: fn(Box<dyn Aggregate>, Box<dyn Aggregate>) -> Option<Box<dyn Aggregate>>,
     event: Event,
 }
@@ -169,97 +164,6 @@ impl SegmentAnalytics {
     }
 }
 
-/*
-impl super::Analytics for SegmentAnalytics {
-    fn instance_uid(&self) -> Option<&InstanceUid> {
-        Some(&self.instance_uid)
-    }
-
-    fn publish(&self, event_name: String, mut send: Value, request: Option<&HttpRequest>) {
-        let user_agent = request.map(extract_user_agents);
-
-        send["user-agent"] = json!(user_agent);
-        let event = Track {
-            user: self.user.clone(),
-            event: event_name.clone(),
-            properties: send,
-            ..Default::default()
-        };
-        let _ = self.sender.try_send(AnalyticsMsg::BatchMessage(event));
-    }
-
-    fn get_search(&self, aggregate: SearchAggregator) {
-        let _ = self.sender.try_send(AnalyticsMsg::AggregateGetSearch(aggregate));
-    }
-
-    fn post_search(&self, aggregate: SearchAggregator) {
-        let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
-    }
-
-    fn get_similar(&self, aggregate: SimilarAggregator) {
-        let _ = self.sender.try_send(AnalyticsMsg::AggregateGetSimilar(aggregate));
-    }
-
-    fn post_similar(&self, aggregate: SimilarAggregator) {
-        let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSimilar(aggregate));
-    }
-
-    fn post_facet_search(&self, aggregate: FacetSearchAggregator) {
-        let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate));
-    }
-
-    fn post_multi_search(&self, aggregate: MultiSearchAggregator) {
-        let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate));
-    }
-
-    fn add_documents(
-        &self,
-        documents_query: &UpdateDocumentsQuery,
-        index_creation: bool,
-        request: &HttpRequest,
-    ) {
-        let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request);
-        let _ = self.sender.try_send(AnalyticsMsg::AggregateAddDocuments(aggregate));
-    }
-
-    fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest) {
-        let aggregate = DocumentsDeletionAggregator::from_query(kind, request);
-        let _ = self.sender.try_send(AnalyticsMsg::AggregateDeleteDocuments(aggregate));
-    }
-
-    fn update_documents(
-        &self,
-        documents_query: &UpdateDocumentsQuery,
-        index_creation: bool,
-        request: &HttpRequest,
-    ) {
-        let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request);
-        let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate));
-    }
-
-    fn update_documents_by_function(
-        &self,
-        documents_query: &DocumentEditionByFunction,
-        index_creation: bool,
-        request: &HttpRequest,
-    ) {
-        let aggregate =
-            EditDocumentsByFunctionAggregator::from_query(documents_query, index_creation, request);
-        let _ = self.sender.try_send(AnalyticsMsg::AggregateEditDocumentsByFunction(aggregate));
-    }
-
-    fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) {
-        let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
-        let _ = self.sender.try_send(AnalyticsMsg::AggregateGetFetchDocuments(aggregate));
-    }
-
-    fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) {
-        let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
-        let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFetchDocuments(aggregate));
-    }
-}
-*/
-
 /// This structure represent the `infos` field we send in the analytics.
 /// It's quite close to the `Opt` structure except all sensitive informations
 /// have been simplified to a boolean.
@@ -536,13 +440,16 @@ impl Segment {
                 properties["requests"]["total_received"] = total.into();
             };
 
-            self.batcher.push(Track {
-                user: self.user.clone(),
-                event: name.to_string(),
-                properties,
-                timestamp: Some(timestamp),
-                ..Default::default()
-            });
+            let _ = self
+                .batcher
+                .push(Track {
+                    user: self.user.clone(),
+                    event: name.to_string(),
+                    properties,
+                    timestamp: Some(timestamp),
+                    ..Default::default()
+                })
+                .await;
         }
 
         let _ = self.batcher.flush().await;
@@ -1181,479 +1088,6 @@ impl Aggregate for MultiSearchAggregator {
     }
 }
 
-#[derive(Default)]
-pub struct FacetSearchAggregator {
-    timestamp: Option<OffsetDateTime>,
-
-    // context
-    user_agents: HashSet<String>,
-
-    // requests
-    total_received: usize,
-    total_succeeded: usize,
-    time_spent: BinaryHeap<usize>,
-
-    // The set of all facetNames that were used
-    facet_names: HashSet<String>,
-
-    // As there been any other parameter than the facetName or facetQuery ones?
-    additional_search_parameters_provided: bool,
-}
-
-impl FacetSearchAggregator {
-    #[allow(clippy::field_reassign_with_default)]
-    pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self {
-        let FacetSearchQuery {
-            facet_query: _,
-            facet_name,
-            vector,
-            q,
-            filter,
-            matching_strategy,
-            attributes_to_search_on,
-            hybrid,
-            ranking_score_threshold,
-            locales,
-        } = query;
-
-        let mut ret = Self::default();
-        ret.timestamp = Some(OffsetDateTime::now_utc());
-
-        ret.total_received = 1;
-        ret.user_agents = extract_user_agents(request).into_iter().collect();
-        ret.facet_names = Some(facet_name.clone()).into_iter().collect();
-
-        ret.additional_search_parameters_provided = q.is_some()
-            || vector.is_some()
-            || filter.is_some()
-            || *matching_strategy != MatchingStrategy::default()
-            || attributes_to_search_on.is_some()
-            || hybrid.is_some()
-            || ranking_score_threshold.is_some()
-            || locales.is_some();
-
-        ret
-    }
-
-    pub fn succeed(&mut self, result: &FacetSearchResult) {
-        let FacetSearchResult { facet_hits: _, facet_query: _, processing_time_ms } = result;
-        self.total_succeeded = self.total_succeeded.saturating_add(1);
-        self.time_spent.push(*processing_time_ms as usize);
-    }
-
-    /// Aggregate one [FacetSearchAggregator] into another.
-    pub fn aggregate(&mut self, mut other: Self) {
-        let Self {
-            timestamp,
-            user_agents,
-            total_received,
-            total_succeeded,
-            ref mut time_spent,
-            facet_names,
-            additional_search_parameters_provided,
-        } = other;
-
-        if self.timestamp.is_none() {
-            self.timestamp = timestamp;
-        }
-
-        // context
-        for user_agent in user_agents.into_iter() {
-            self.user_agents.insert(user_agent);
-        }
-
-        // request
-        self.total_received = self.total_received.saturating_add(total_received);
-        self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
-        self.time_spent.append(time_spent);
-
-        // facet_names
-        for facet_name in facet_names.into_iter() {
-            self.facet_names.insert(facet_name);
-        }
-
-        // additional_search_parameters_provided
-        self.additional_search_parameters_provided |= additional_search_parameters_provided;
-    }
-
-    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
-        let Self {
-            timestamp,
-            user_agents,
-            total_received,
-            total_succeeded,
-            time_spent,
-            facet_names,
-            additional_search_parameters_provided,
-        } = self;
-
-        if total_received == 0 {
-            None
-        } else {
-            // the index of the 99th percentage of value
-            let percentile_99th = 0.99 * (total_succeeded as f64 - 1.) + 1.;
-            // we get all the values in a sorted manner
-            let time_spent = time_spent.into_sorted_vec();
-            // We are only interested by the slowest value of the 99th fastest results
-            let time_spent = time_spent.get(percentile_99th as usize);
-
-            let properties = json!({
-                "user-agent": user_agents,
-                "requests": {
-                    "99th_response_time":  time_spent.map(|t| format!("{:.2}", t)),
-                    "total_succeeded": total_succeeded,
-                    "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
-                    "total_received": total_received,
-                },
-                "facets": {
-                    "total_distinct_facet_count": facet_names.len(),
-                    "additional_search_parameters_provided": additional_search_parameters_provided,
-                },
-            });
-
-            Some(Track {
-                timestamp,
-                user: user.clone(),
-                event: event_name.to_string(),
-                properties,
-                ..Default::default()
-            })
-        }
-    }
-}
-
-#[derive(Default)]
-pub struct DocumentsAggregator {
-    timestamp: Option<OffsetDateTime>,
-
-    // set to true when at least one request was received
-    updated: bool,
-
-    // context
-    user_agents: HashSet<String>,
-
-    content_types: HashSet<String>,
-    primary_keys: HashSet<String>,
-    index_creation: bool,
-}
-
-impl DocumentsAggregator {
-    pub fn from_query(
-        documents_query: &UpdateDocumentsQuery,
-        index_creation: bool,
-        request: &HttpRequest,
-    ) -> Self {
-        let UpdateDocumentsQuery { primary_key, csv_delimiter: _ } = documents_query;
-
-        let mut primary_keys = HashSet::new();
-        if let Some(primary_key) = primary_key.clone() {
-            primary_keys.insert(primary_key);
-        }
-
-        let mut content_types = HashSet::new();
-        let content_type = request
-            .headers()
-            .get(CONTENT_TYPE)
-            .and_then(|s| s.to_str().ok())
-            .unwrap_or("unknown")
-            .to_string();
-        content_types.insert(content_type);
-
-        Self {
-            timestamp: Some(OffsetDateTime::now_utc()),
-            updated: true,
-            user_agents: extract_user_agents(request).into_iter().collect(),
-            content_types,
-            primary_keys,
-            index_creation,
-        }
-    }
-
-    /// Aggregate one [DocumentsAggregator] into another.
-    pub fn aggregate(&mut self, other: Self) {
-        let Self { timestamp, user_agents, primary_keys, content_types, index_creation, updated } =
-            other;
-
-        if self.timestamp.is_none() {
-            self.timestamp = timestamp;
-        }
-
-        self.updated |= updated;
-        // we can't create a union because there is no `into_union` method
-        for user_agent in user_agents {
-            self.user_agents.insert(user_agent);
-        }
-        for primary_key in primary_keys {
-            self.primary_keys.insert(primary_key);
-        }
-        for content_type in content_types {
-            self.content_types.insert(content_type);
-        }
-        self.index_creation |= index_creation;
-    }
-
-    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
-        let Self { timestamp, user_agents, primary_keys, content_types, index_creation, updated } =
-            self;
-
-        if !updated {
-            None
-        } else {
-            let properties = json!({
-                "user-agent": user_agents,
-                "payload_type": content_types,
-                "primary_key": primary_keys,
-                "index_creation": index_creation,
-            });
-
-            Some(Track {
-                timestamp,
-                user: user.clone(),
-                event: event_name.to_string(),
-                properties,
-                ..Default::default()
-            })
-        }
-    }
-}
-
-#[derive(Default)]
-pub struct EditDocumentsByFunctionAggregator {
-    timestamp: Option<OffsetDateTime>,
-
-    // Set to true if at least one request was filtered
-    filtered: bool,
-    // Set to true if at least one request contained a context
-    with_context: bool,
-
-    // context
-    user_agents: HashSet<String>,
-
-    index_creation: bool,
-}
-
-impl EditDocumentsByFunctionAggregator {
-    pub fn from_query(
-        documents_query: &DocumentEditionByFunction,
-        index_creation: bool,
-        request: &HttpRequest,
-    ) -> Self {
-        let DocumentEditionByFunction { filter, context, function: _ } = documents_query;
-
-        Self {
-            timestamp: Some(OffsetDateTime::now_utc()),
-            user_agents: extract_user_agents(request).into_iter().collect(),
-            filtered: filter.is_some(),
-            with_context: context.is_some(),
-            index_creation,
-        }
-    }
-
-    /// Aggregate one [DocumentsAggregator] into another.
-    pub fn aggregate(&mut self, other: Self) {
-        let Self { timestamp, user_agents, index_creation, filtered, with_context } = other;
-
-        if self.timestamp.is_none() {
-            self.timestamp = timestamp;
-        }
-
-        // we can't create a union because there is no `into_union` method
-        for user_agent in user_agents {
-            self.user_agents.insert(user_agent);
-        }
-        self.index_creation |= index_creation;
-        self.filtered |= filtered;
-        self.with_context |= with_context;
-    }
-
-    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
-        let Self { timestamp, user_agents, index_creation, filtered, with_context } = self;
-
-        // if we had no timestamp it means we never encountered any events and
-        // thus we don't need to send this event.
-        let timestamp = timestamp?;
-
-        let properties = json!({
-            "user-agent": user_agents,
-            "filtered": filtered,
-            "with_context": with_context,
-            "index_creation": index_creation,
-        });
-
-        Some(Track {
-            timestamp: Some(timestamp),
-            user: user.clone(),
-            event: event_name.to_string(),
-            properties,
-            ..Default::default()
-        })
-    }
-}
-
-#[derive(Default, Serialize)]
-pub struct DocumentsDeletionAggregator {
-    #[serde(skip)]
-    timestamp: Option<OffsetDateTime>,
-
-    // context
-    #[serde(rename = "user-agent")]
-    user_agents: HashSet<String>,
-
-    #[serde(rename = "requests.total_received")]
-    total_received: usize,
-    per_document_id: bool,
-    clear_all: bool,
-    per_batch: bool,
-    per_filter: bool,
-}
-
-impl DocumentsDeletionAggregator {
-    pub fn from_query(kind: DocumentDeletionKind, request: &HttpRequest) -> Self {
-        Self {
-            timestamp: Some(OffsetDateTime::now_utc()),
-            user_agents: extract_user_agents(request).into_iter().collect(),
-            total_received: 1,
-            per_document_id: matches!(kind, DocumentDeletionKind::PerDocumentId),
-            clear_all: matches!(kind, DocumentDeletionKind::ClearAll),
-            per_batch: matches!(kind, DocumentDeletionKind::PerBatch),
-            per_filter: matches!(kind, DocumentDeletionKind::PerFilter),
-        }
-    }
-
-    /// Aggregate one [DocumentsAggregator] into another.
-    pub fn aggregate(&mut self, other: Self) {
-        let Self {
-            timestamp,
-            user_agents,
-            total_received,
-            per_document_id,
-            clear_all,
-            per_batch,
-            per_filter,
-        } = other;
-
-        if self.timestamp.is_none() {
-            self.timestamp = timestamp;
-        }
-
-        // we can't create a union because there is no `into_union` method
-        for user_agent in user_agents {
-            self.user_agents.insert(user_agent);
-        }
-        self.total_received = self.total_received.saturating_add(total_received);
-        self.per_document_id |= per_document_id;
-        self.clear_all |= clear_all;
-        self.per_batch |= per_batch;
-        self.per_filter |= per_filter;
-    }
-
-    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
-        // if we had no timestamp it means we never encountered any events and
-        // thus we don't need to send this event.
-        let timestamp = self.timestamp?;
-
-        Some(Track {
-            timestamp: Some(timestamp),
-            user: user.clone(),
-            event: event_name.to_string(),
-            properties: serde_json::to_value(self).ok()?,
-            ..Default::default()
-        })
-    }
-}
-
-#[derive(Default, Serialize)]
-pub struct DocumentsFetchAggregator {
-    #[serde(skip)]
-    timestamp: Option<OffsetDateTime>,
-
-    // context
-    #[serde(rename = "user-agent")]
-    user_agents: HashSet<String>,
-
-    #[serde(rename = "requests.total_received")]
-    total_received: usize,
-
-    // a call on ../documents/:doc_id
-    per_document_id: bool,
-    // if a filter was used
-    per_filter: bool,
-
-    #[serde(rename = "vector.retrieve_vectors")]
-    retrieve_vectors: bool,
-
-    // pagination
-    #[serde(rename = "pagination.max_limit")]
-    max_limit: usize,
-    #[serde(rename = "pagination.max_offset")]
-    max_offset: usize,
-}
-
-impl DocumentsFetchAggregator {
-    pub fn from_query(query: &DocumentFetchKind, request: &HttpRequest) -> Self {
-        let (limit, offset, retrieve_vectors) = match query {
-            DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors),
-            DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => {
-                (*limit, *offset, *retrieve_vectors)
-            }
-        };
-        Self {
-            timestamp: Some(OffsetDateTime::now_utc()),
-            user_agents: extract_user_agents(request).into_iter().collect(),
-            total_received: 1,
-            per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
-            per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
-            max_limit: limit,
-            max_offset: offset,
-            retrieve_vectors,
-        }
-    }
-
-    /// Aggregate one [DocumentsFetchAggregator] into another.
-    pub fn aggregate(&mut self, other: Self) {
-        let Self {
-            timestamp,
-            user_agents,
-            total_received,
-            per_document_id,
-            per_filter,
-            max_limit,
-            max_offset,
-            retrieve_vectors,
-        } = other;
-
-        if self.timestamp.is_none() {
-            self.timestamp = timestamp;
-        }
-        for user_agent in user_agents {
-            self.user_agents.insert(user_agent);
-        }
-
-        self.total_received = self.total_received.saturating_add(total_received);
-        self.per_document_id |= per_document_id;
-        self.per_filter |= per_filter;
-
-        self.max_limit = self.max_limit.max(max_limit);
-        self.max_offset = self.max_offset.max(max_offset);
-
-        self.retrieve_vectors |= retrieve_vectors;
-    }
-
-    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
-        // if we had no timestamp it means we never encountered any events and
-        // thus we don't need to send this event.
-        let timestamp = self.timestamp?;
-
-        Some(Track {
-            timestamp: Some(timestamp),
-            user: user.clone(),
-            event: event_name.to_string(),
-            properties: serde_json::to_value(self).ok()?,
-            ..Default::default()
-        })
-    }
-}
-
 aggregate_methods!(
     SimilarPOST => "Similar POST",
     SimilarGET => "Similar GET",
diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs
index 715eaaaa7..8e40397c7 100644
--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@@ -9,7 +9,6 @@ use meilisearch_types::error::deserr_codes::*;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::locales::Locale;
-use serde::Serialize;
 use serde_json::Value;
 use tracing::debug;
 

From 0fde49640a3f76cce57414e88b6690aa90ff8523 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 17 Oct 2024 09:18:25 +0200
Subject: [PATCH 61/92] make clippy happy

---
 meilisearch/src/main.rs                    |   1 -
 meilisearch/src/routes/indexes/settings.rs | 111 ++++++++-------------
 2 files changed, 43 insertions(+), 69 deletions(-)

diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs
index eebea3b6d..c0652bf1e 100644
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
@@ -223,7 +223,6 @@ pub fn print_launch_resume(opt: &Opt, analytics: Analytics, config_read_from: Op
         eprintln!("Prototype:\t\t{:?}", prototype);
     }
 
-    #[cfg(all(not(debug_assertions), feature = "analytics"))]
     {
         if !opt.no_analytics {
             eprintln!(
diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs
index f31f52dc1..745ad5c78 100644
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -94,7 +94,7 @@ macro_rules! make_setting_route {
 
                 #[allow(clippy::redundant_closure_call)]
                 analytics.publish(
-                    $crate::routes::indexes::settings::$analytics::new(body.as_ref()).to_settings(),
+                    $crate::routes::indexes::settings::$analytics::new(body.as_ref()).into_settings(),
                     &req,
                 );
 
@@ -605,58 +605,33 @@ struct RankingRulesAnalytics {
 impl RankingRulesAnalytics {
     pub fn new(rr: Option<&Vec<RankingRuleView>>) -> Self {
         RankingRulesAnalytics {
-            words_position: rr
-                .as_ref()
-                .map(|rr| {
-                    rr.iter().position(|s| {
-                        matches!(s, meilisearch_types::settings::RankingRuleView::Words)
-                    })
+            words_position: rr.as_ref().and_then(|rr| {
+                rr.iter()
+                    .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words))
+            }),
+            typo_position: rr.as_ref().and_then(|rr| {
+                rr.iter()
+                    .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo))
+            }),
+            proximity_position: rr.as_ref().and_then(|rr| {
+                rr.iter().position(|s| {
+                    matches!(s, meilisearch_types::settings::RankingRuleView::Proximity)
                 })
-                .flatten(),
-
-            typo_position: rr
-                .as_ref()
-                .map(|rr| {
-                    rr.iter().position(|s| {
-                        matches!(s, meilisearch_types::settings::RankingRuleView::Typo)
-                    })
+            }),
+            attribute_position: rr.as_ref().and_then(|rr| {
+                rr.iter().position(|s| {
+                    matches!(s, meilisearch_types::settings::RankingRuleView::Attribute)
                 })
-                .flatten(),
-
-            proximity_position: rr
-                .as_ref()
-                .map(|rr| {
-                    rr.iter().position(|s| {
-                        matches!(s, meilisearch_types::settings::RankingRuleView::Proximity)
-                    })
+            }),
+            sort_position: rr.as_ref().and_then(|rr| {
+                rr.iter()
+                    .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort))
+            }),
+            exactness_position: rr.as_ref().and_then(|rr| {
+                rr.iter().position(|s| {
+                    matches!(s, meilisearch_types::settings::RankingRuleView::Exactness)
                 })
-                .flatten(),
-
-            attribute_position: rr
-                .as_ref()
-                .map(|rr| {
-                    rr.iter().position(|s| {
-                        matches!(s, meilisearch_types::settings::RankingRuleView::Attribute)
-                    })
-                })
-                .flatten(),
-            sort_position: rr
-                .as_ref()
-                .map(|rr| {
-                    rr.iter().position(|s| {
-                        matches!(s, meilisearch_types::settings::RankingRuleView::Sort)
-                    })
-                })
-                .flatten(),
-            exactness_position: rr
-                .as_ref()
-                .map(|rr| {
-                    rr.iter().position(|s| {
-                        matches!(s, meilisearch_types::settings::RankingRuleView::Exactness)
-                    })
-                })
-                .flatten(),
-
+            }),
             values: rr.as_ref().map(|rr| {
                 rr.iter()
                     .filter(|s| {
@@ -673,7 +648,7 @@ impl RankingRulesAnalytics {
         }
     }
 
-    pub fn to_settings(self) -> SettingsAnalytics {
+    pub fn into_settings(self) -> SettingsAnalytics {
         SettingsAnalytics { ranking_rules: self, ..Default::default() }
     }
 }
@@ -694,7 +669,7 @@ impl SearchableAttributesAnalytics {
         }
     }
 
-    pub fn to_settings(self) -> SettingsAnalytics {
+    pub fn into_settings(self) -> SettingsAnalytics {
         SettingsAnalytics { searchable_attributes: self, ..Default::default() }
     }
 }
@@ -715,7 +690,7 @@ impl DisplayedAttributesAnalytics {
         }
     }
 
-    pub fn to_settings(self) -> SettingsAnalytics {
+    pub fn into_settings(self) -> SettingsAnalytics {
         SettingsAnalytics { displayed_attributes: self, ..Default::default() }
     }
 }
@@ -734,7 +709,7 @@ impl SortableAttributesAnalytics {
         }
     }
 
-    pub fn to_settings(self) -> SettingsAnalytics {
+    pub fn into_settings(self) -> SettingsAnalytics {
         SettingsAnalytics { sortable_attributes: self, ..Default::default() }
     }
 }
@@ -753,7 +728,7 @@ impl FilterableAttributesAnalytics {
         }
     }
 
-    pub fn to_settings(self) -> SettingsAnalytics {
+    pub fn into_settings(self) -> SettingsAnalytics {
         SettingsAnalytics { filterable_attributes: self, ..Default::default() }
     }
 }
@@ -768,7 +743,7 @@ impl DistinctAttributeAnalytics {
         Self { set: distinct.is_some() }
     }
 
-    pub fn to_settings(self) -> SettingsAnalytics {
+    pub fn into_settings(self) -> SettingsAnalytics {
         SettingsAnalytics { distinct_attribute: self, ..Default::default() }
     }
 }
@@ -784,7 +759,7 @@ impl ProximityPrecisionAnalytics {
         Self { set: precision.is_some(), value: precision.cloned() }
     }
 
-    pub fn to_settings(self) -> SettingsAnalytics {
+    pub fn into_settings(self) -> SettingsAnalytics {
         SettingsAnalytics { proximity_precision: self, ..Default::default() }
     }
 }
@@ -818,7 +793,7 @@ impl TypoToleranceAnalytics {
                 .flatten(),
         }
     }
-    pub fn to_settings(self) -> SettingsAnalytics {
+    pub fn into_settings(self) -> SettingsAnalytics {
         SettingsAnalytics { typo_tolerance: self, ..Default::default() }
     }
 }
@@ -846,7 +821,7 @@ impl FacetingAnalytics {
         }
     }
 
-    pub fn to_settings(self) -> SettingsAnalytics {
+    pub fn into_settings(self) -> SettingsAnalytics {
         SettingsAnalytics { faceting: self, ..Default::default() }
     }
 }
@@ -861,7 +836,7 @@ impl PaginationAnalytics {
         Self { max_total_hits: setting.as_ref().and_then(|s| s.max_total_hits.set()) }
     }
 
-    pub fn to_settings(self) -> SettingsAnalytics {
+    pub fn into_settings(self) -> SettingsAnalytics {
         SettingsAnalytics { pagination: self, ..Default::default() }
     }
 }
@@ -876,7 +851,7 @@ impl StopWordsAnalytics {
         Self { total: stop_words.as_ref().map(|stop_words| stop_words.len()) }
     }
 
-    pub fn to_settings(self) -> SettingsAnalytics {
+    pub fn into_settings(self) -> SettingsAnalytics {
         SettingsAnalytics { stop_words: self, ..Default::default() }
     }
 }
@@ -891,7 +866,7 @@ impl SynonymsAnalytics {
         Self { total: synonyms.as_ref().map(|synonyms| synonyms.len()) }
     }
 
-    pub fn to_settings(self) -> SettingsAnalytics {
+    pub fn into_settings(self) -> SettingsAnalytics {
         SettingsAnalytics { synonyms: self, ..Default::default() }
     }
 }
@@ -960,7 +935,7 @@ impl EmbeddersAnalytics {
         }
     }
 
-    pub fn to_settings(self) -> SettingsAnalytics {
+    pub fn into_settings(self) -> SettingsAnalytics {
         SettingsAnalytics { embedders: self, ..Default::default() }
     }
 }
@@ -976,7 +951,7 @@ impl SearchCutoffMsAnalytics {
         Self { search_cutoff_ms: setting.copied() }
     }
 
-    pub fn to_settings(self) -> SettingsAnalytics {
+    pub fn into_settings(self) -> SettingsAnalytics {
         SettingsAnalytics { search_cutoff_ms: self, ..Default::default() }
     }
 }
@@ -1001,7 +976,7 @@ impl LocalesAnalytics {
         }
     }
 
-    pub fn to_settings(self) -> SettingsAnalytics {
+    pub fn into_settings(self) -> SettingsAnalytics {
         SettingsAnalytics { locales: self, ..Default::default() }
     }
 }
@@ -1016,7 +991,7 @@ impl DictionaryAnalytics {
         Self { total: dictionary.as_ref().map(|dictionary| dictionary.len()) }
     }
 
-    pub fn to_settings(self) -> SettingsAnalytics {
+    pub fn into_settings(self) -> SettingsAnalytics {
         SettingsAnalytics { dictionary: self, ..Default::default() }
     }
 }
@@ -1031,7 +1006,7 @@ impl SeparatorTokensAnalytics {
         Self { total: separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()) }
     }
 
-    pub fn to_settings(self) -> SettingsAnalytics {
+    pub fn into_settings(self) -> SettingsAnalytics {
         SettingsAnalytics { separator_tokens: self, ..Default::default() }
     }
 }
@@ -1050,7 +1025,7 @@ impl NonSeparatorTokensAnalytics {
         }
     }
 
-    pub fn to_settings(self) -> SettingsAnalytics {
+    pub fn into_settings(self) -> SettingsAnalytics {
         SettingsAnalytics { non_separator_tokens: self, ..Default::default() }
     }
 }

From d9115b74f09118b3bc687f9c0853bb74469b0d87 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 17 Oct 2024 09:32:54 +0200
Subject: [PATCH 62/92] move the analytics settings code to a dedicated file

---
 meilisearch/src/routes/indexes/mod.rs         |   1 +
 meilisearch/src/routes/indexes/settings.rs    | 634 +-----------------
 .../src/routes/indexes/settings_analytics.rs  | 627 +++++++++++++++++
 3 files changed, 632 insertions(+), 630 deletions(-)
 create mode 100644 meilisearch/src/routes/indexes/settings_analytics.rs

diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs
index 8972119d7..65c81a57e 100644
--- a/meilisearch/src/routes/indexes/mod.rs
+++ b/meilisearch/src/routes/indexes/mod.rs
@@ -29,6 +29,7 @@ pub mod documents;
 pub mod facet_search;
 pub mod search;
 pub mod settings;
+mod settings_analytics;
 pub mod similar;
 
 pub fn configure(cfg: &mut web::ServiceConfig) {
diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs
index 745ad5c78..bca763a99 100644
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -1,23 +1,17 @@
-use std::collections::{BTreeSet, HashSet};
-
+use super::settings_analytics::*;
 use actix_web::web::Data;
 use actix_web::{web, HttpRequest, HttpResponse};
 use deserr::actix_web::AwebJson;
 use index_scheduler::IndexScheduler;
 use meilisearch_types::deserr::DeserrJsonError;
 use meilisearch_types::error::ResponseError;
-use meilisearch_types::facet_values_sort::FacetValuesSort;
 use meilisearch_types::index_uid::IndexUid;
-use meilisearch_types::locales::Locale;
 use meilisearch_types::milli::update::Setting;
-use meilisearch_types::settings::{
-    settings, ProximityPrecisionView, RankingRuleView, SecretPolicy, Settings, Unchecked,
-};
+use meilisearch_types::settings::{settings, SecretPolicy, Settings, Unchecked};
 use meilisearch_types::tasks::KindWithContent;
-use serde::Serialize;
 use tracing::debug;
 
-use crate::analytics::{Aggregate, Analytics};
+use crate::analytics::Analytics;
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
@@ -94,7 +88,7 @@ macro_rules! make_setting_route {
 
                 #[allow(clippy::redundant_closure_call)]
                 analytics.publish(
-                    $crate::routes::indexes::settings::$analytics::new(body.as_ref()).into_settings(),
+                    $crate::routes::indexes::settings_analytics::$analytics::new(body.as_ref()).into_settings(),
                     &req,
                 );
 
@@ -410,626 +404,6 @@ generate_configure!(
     search_cutoff_ms
 );
 
-#[derive(Serialize, Default)]
-struct SettingsAnalytics {
-    ranking_rules: RankingRulesAnalytics,
-    searchable_attributes: SearchableAttributesAnalytics,
-    displayed_attributes: DisplayedAttributesAnalytics,
-    sortable_attributes: SortableAttributesAnalytics,
-    filterable_attributes: FilterableAttributesAnalytics,
-    distinct_attribute: DistinctAttributeAnalytics,
-    proximity_precision: ProximityPrecisionAnalytics,
-    typo_tolerance: TypoToleranceAnalytics,
-    faceting: FacetingAnalytics,
-    pagination: PaginationAnalytics,
-    stop_words: StopWordsAnalytics,
-    synonyms: SynonymsAnalytics,
-    embedders: EmbeddersAnalytics,
-    search_cutoff_ms: SearchCutoffMsAnalytics,
-    locales: LocalesAnalytics,
-    dictionary: DictionaryAnalytics,
-    separator_tokens: SeparatorTokensAnalytics,
-    non_separator_tokens: NonSeparatorTokensAnalytics,
-}
-
-impl Aggregate for SettingsAnalytics {
-    fn event_name(&self) -> &'static str {
-        "Settings Updated"
-    }
-
-    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
-        Box::new(Self {
-            ranking_rules: RankingRulesAnalytics {
-                words_position: self
-                    .ranking_rules
-                    .words_position
-                    .or(other.ranking_rules.words_position),
-                typo_position: self
-                    .ranking_rules
-                    .typo_position
-                    .or(other.ranking_rules.typo_position),
-                proximity_position: self
-                    .ranking_rules
-                    .proximity_position
-                    .or(other.ranking_rules.proximity_position),
-                attribute_position: self
-                    .ranking_rules
-                    .attribute_position
-                    .or(other.ranking_rules.attribute_position),
-                sort_position: self
-                    .ranking_rules
-                    .sort_position
-                    .or(other.ranking_rules.sort_position),
-                exactness_position: self
-                    .ranking_rules
-                    .exactness_position
-                    .or(other.ranking_rules.exactness_position),
-                values: self.ranking_rules.values.or(other.ranking_rules.values),
-            },
-            searchable_attributes: SearchableAttributesAnalytics {
-                total: self.searchable_attributes.total.or(other.searchable_attributes.total),
-                with_wildcard: self
-                    .searchable_attributes
-                    .with_wildcard
-                    .or(other.searchable_attributes.with_wildcard),
-            },
-            displayed_attributes: DisplayedAttributesAnalytics {
-                total: self.displayed_attributes.total.or(other.displayed_attributes.total),
-                with_wildcard: self
-                    .displayed_attributes
-                    .with_wildcard
-                    .or(other.displayed_attributes.with_wildcard),
-            },
-            sortable_attributes: SortableAttributesAnalytics {
-                total: self.sortable_attributes.total.or(other.sortable_attributes.total),
-                has_geo: self.sortable_attributes.has_geo.or(other.sortable_attributes.has_geo),
-            },
-            filterable_attributes: FilterableAttributesAnalytics {
-                total: self.filterable_attributes.total.or(other.filterable_attributes.total),
-                has_geo: self.filterable_attributes.has_geo.or(other.filterable_attributes.has_geo),
-            },
-            distinct_attribute: DistinctAttributeAnalytics {
-                set: self.distinct_attribute.set | other.distinct_attribute.set,
-            },
-            proximity_precision: ProximityPrecisionAnalytics {
-                set: self.proximity_precision.set | other.proximity_precision.set,
-                value: self.proximity_precision.value.or(other.proximity_precision.value),
-            },
-            typo_tolerance: TypoToleranceAnalytics {
-                enabled: self.typo_tolerance.enabled.or(other.typo_tolerance.enabled),
-                disable_on_attributes: self
-                    .typo_tolerance
-                    .disable_on_attributes
-                    .or(other.typo_tolerance.disable_on_attributes),
-                disable_on_words: self
-                    .typo_tolerance
-                    .disable_on_words
-                    .or(other.typo_tolerance.disable_on_words),
-                min_word_size_for_one_typo: self
-                    .typo_tolerance
-                    .min_word_size_for_one_typo
-                    .or(other.typo_tolerance.min_word_size_for_one_typo),
-                min_word_size_for_two_typos: self
-                    .typo_tolerance
-                    .min_word_size_for_two_typos
-                    .or(other.typo_tolerance.min_word_size_for_two_typos),
-            },
-            faceting: FacetingAnalytics {
-                max_values_per_facet: self
-                    .faceting
-                    .max_values_per_facet
-                    .or(other.faceting.max_values_per_facet),
-                sort_facet_values_by_star_count: self
-                    .faceting
-                    .sort_facet_values_by_star_count
-                    .or(other.faceting.sort_facet_values_by_star_count),
-                sort_facet_values_by_total: self
-                    .faceting
-                    .sort_facet_values_by_total
-                    .or(other.faceting.sort_facet_values_by_total),
-            },
-            pagination: PaginationAnalytics {
-                max_total_hits: self.pagination.max_total_hits.or(other.pagination.max_total_hits),
-            },
-            stop_words: StopWordsAnalytics {
-                total: self.stop_words.total.or(other.stop_words.total),
-            },
-            synonyms: SynonymsAnalytics { total: self.synonyms.total.or(other.synonyms.total) },
-            embedders: EmbeddersAnalytics {
-                total: self.embedders.total.or(other.embedders.total),
-                sources: match (self.embedders.sources, other.embedders.sources) {
-                    (None, None) => None,
-                    (Some(sources), None) | (None, Some(sources)) => Some(sources),
-                    (Some(this), Some(other)) => Some(this.union(&other).cloned().collect()),
-                },
-                document_template_used: match (
-                    self.embedders.document_template_used,
-                    other.embedders.document_template_used,
-                ) {
-                    (None, None) => None,
-                    (Some(used), None) | (None, Some(used)) => Some(used),
-                    (Some(this), Some(other)) => Some(this | other),
-                },
-                document_template_max_bytes: match (
-                    self.embedders.document_template_max_bytes,
-                    other.embedders.document_template_max_bytes,
-                ) {
-                    (None, None) => None,
-                    (Some(bytes), None) | (None, Some(bytes)) => Some(bytes),
-                    (Some(this), Some(other)) => Some(this.max(other)),
-                },
-                binary_quantization_used: match (
-                    self.embedders.binary_quantization_used,
-                    other.embedders.binary_quantization_used,
-                ) {
-                    (None, None) => None,
-                    (Some(bq), None) | (None, Some(bq)) => Some(bq),
-                    (Some(this), Some(other)) => Some(this | other),
-                },
-            },
-            search_cutoff_ms: SearchCutoffMsAnalytics {
-                search_cutoff_ms: self
-                    .search_cutoff_ms
-                    .search_cutoff_ms
-                    .or(other.search_cutoff_ms.search_cutoff_ms),
-            },
-            locales: LocalesAnalytics { locales: self.locales.locales.or(other.locales.locales) },
-            dictionary: DictionaryAnalytics {
-                total: self.dictionary.total.or(other.dictionary.total),
-            },
-            separator_tokens: SeparatorTokensAnalytics {
-                total: self.separator_tokens.total.or(other.non_separator_tokens.total),
-            },
-            non_separator_tokens: NonSeparatorTokensAnalytics {
-                total: self.non_separator_tokens.total.or(other.non_separator_tokens.total),
-            },
-        })
-    }
-
-    fn into_event(self: Box<Self>) -> serde_json::Value {
-        serde_json::to_value(*self).unwrap_or_default()
-    }
-}
-
-#[derive(Serialize, Default)]
-struct RankingRulesAnalytics {
-    words_position: Option<usize>,
-    typo_position: Option<usize>,
-    proximity_position: Option<usize>,
-    attribute_position: Option<usize>,
-    sort_position: Option<usize>,
-    exactness_position: Option<usize>,
-    values: Option<String>,
-}
-
-impl RankingRulesAnalytics {
-    pub fn new(rr: Option<&Vec<RankingRuleView>>) -> Self {
-        RankingRulesAnalytics {
-            words_position: rr.as_ref().and_then(|rr| {
-                rr.iter()
-                    .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words))
-            }),
-            typo_position: rr.as_ref().and_then(|rr| {
-                rr.iter()
-                    .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo))
-            }),
-            proximity_position: rr.as_ref().and_then(|rr| {
-                rr.iter().position(|s| {
-                    matches!(s, meilisearch_types::settings::RankingRuleView::Proximity)
-                })
-            }),
-            attribute_position: rr.as_ref().and_then(|rr| {
-                rr.iter().position(|s| {
-                    matches!(s, meilisearch_types::settings::RankingRuleView::Attribute)
-                })
-            }),
-            sort_position: rr.as_ref().and_then(|rr| {
-                rr.iter()
-                    .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort))
-            }),
-            exactness_position: rr.as_ref().and_then(|rr| {
-                rr.iter().position(|s| {
-                    matches!(s, meilisearch_types::settings::RankingRuleView::Exactness)
-                })
-            }),
-            values: rr.as_ref().map(|rr| {
-                rr.iter()
-                    .filter(|s| {
-                        matches!(
-                            s,
-                            meilisearch_types::settings::RankingRuleView::Asc(_)
-                                | meilisearch_types::settings::RankingRuleView::Desc(_)
-                        )
-                    })
-                    .map(|x| x.to_string())
-                    .collect::<Vec<_>>()
-                    .join(", ")
-            }),
-        }
-    }
-
-    pub fn into_settings(self) -> SettingsAnalytics {
-        SettingsAnalytics { ranking_rules: self, ..Default::default() }
-    }
-}
-
-#[derive(Serialize, Default)]
-struct SearchableAttributesAnalytics {
-    total: Option<usize>,
-    with_wildcard: Option<bool>,
-}
-
-impl SearchableAttributesAnalytics {
-    pub fn new(setting: Option<&Vec<String>>) -> Self {
-        Self {
-            total: setting.as_ref().map(|searchable| searchable.len()),
-            with_wildcard: setting
-                .as_ref()
-                .map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
-        }
-    }
-
-    pub fn into_settings(self) -> SettingsAnalytics {
-        SettingsAnalytics { searchable_attributes: self, ..Default::default() }
-    }
-}
-
-#[derive(Serialize, Default)]
-struct DisplayedAttributesAnalytics {
-    total: Option<usize>,
-    with_wildcard: Option<bool>,
-}
-
-impl DisplayedAttributesAnalytics {
-    pub fn new(displayed: Option<&Vec<String>>) -> Self {
-        Self {
-            total: displayed.as_ref().map(|displayed| displayed.len()),
-            with_wildcard: displayed
-                .as_ref()
-                .map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
-        }
-    }
-
-    pub fn into_settings(self) -> SettingsAnalytics {
-        SettingsAnalytics { displayed_attributes: self, ..Default::default() }
-    }
-}
-
-#[derive(Serialize, Default)]
-struct SortableAttributesAnalytics {
-    total: Option<usize>,
-    has_geo: Option<bool>,
-}
-
-impl SortableAttributesAnalytics {
-    pub fn new(setting: Option<&std::collections::BTreeSet<String>>) -> Self {
-        Self {
-            total: setting.as_ref().map(|sort| sort.len()),
-            has_geo: setting.as_ref().map(|sort| sort.contains("_geo")),
-        }
-    }
-
-    pub fn into_settings(self) -> SettingsAnalytics {
-        SettingsAnalytics { sortable_attributes: self, ..Default::default() }
-    }
-}
-
-#[derive(Serialize, Default)]
-struct FilterableAttributesAnalytics {
-    total: Option<usize>,
-    has_geo: Option<bool>,
-}
-
-impl FilterableAttributesAnalytics {
-    pub fn new(setting: Option<&std::collections::BTreeSet<String>>) -> Self {
-        Self {
-            total: setting.as_ref().map(|filter| filter.len()),
-            has_geo: setting.as_ref().map(|filter| filter.contains("_geo")),
-        }
-    }
-
-    pub fn into_settings(self) -> SettingsAnalytics {
-        SettingsAnalytics { filterable_attributes: self, ..Default::default() }
-    }
-}
-
-#[derive(Serialize, Default)]
-struct DistinctAttributeAnalytics {
-    set: bool,
-}
-
-impl DistinctAttributeAnalytics {
-    pub fn new(distinct: Option<&String>) -> Self {
-        Self { set: distinct.is_some() }
-    }
-
-    pub fn into_settings(self) -> SettingsAnalytics {
-        SettingsAnalytics { distinct_attribute: self, ..Default::default() }
-    }
-}
-
-#[derive(Serialize, Default)]
-struct ProximityPrecisionAnalytics {
-    set: bool,
-    value: Option<ProximityPrecisionView>,
-}
-
-impl ProximityPrecisionAnalytics {
-    pub fn new(precision: Option<&meilisearch_types::settings::ProximityPrecisionView>) -> Self {
-        Self { set: precision.is_some(), value: precision.cloned() }
-    }
-
-    pub fn into_settings(self) -> SettingsAnalytics {
-        SettingsAnalytics { proximity_precision: self, ..Default::default() }
-    }
-}
-
-#[derive(Serialize, Default)]
-struct TypoToleranceAnalytics {
-    enabled: Option<bool>,
-    disable_on_attributes: Option<bool>,
-    disable_on_words: Option<bool>,
-    min_word_size_for_one_typo: Option<u8>,
-    min_word_size_for_two_typos: Option<u8>,
-}
-
-impl TypoToleranceAnalytics {
-    pub fn new(setting: Option<&meilisearch_types::settings::TypoSettings>) -> Self {
-        Self {
-            enabled: setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))),
-            disable_on_attributes: setting
-                .as_ref()
-                .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
-            disable_on_words: setting
-                .as_ref()
-                .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
-            min_word_size_for_one_typo: setting
-                .as_ref()
-                .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.one_typo.set()))
-                .flatten(),
-            min_word_size_for_two_typos: setting
-                .as_ref()
-                .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.two_typos.set()))
-                .flatten(),
-        }
-    }
-    pub fn into_settings(self) -> SettingsAnalytics {
-        SettingsAnalytics { typo_tolerance: self, ..Default::default() }
-    }
-}
-
-#[derive(Serialize, Default)]
-struct FacetingAnalytics {
-    max_values_per_facet: Option<usize>,
-    sort_facet_values_by_star_count: Option<bool>,
-    sort_facet_values_by_total: Option<usize>,
-}
-
-impl FacetingAnalytics {
-    pub fn new(setting: Option<&meilisearch_types::settings::FacetingSettings>) -> Self {
-        Self {
-            max_values_per_facet: setting.as_ref().and_then(|s| s.max_values_per_facet.set()),
-            sort_facet_values_by_star_count: setting.as_ref().and_then(|s| {
-                s.sort_facet_values_by
-                    .as_ref()
-                    .set()
-                    .map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count))
-            }),
-            sort_facet_values_by_total: setting
-                .as_ref()
-                .and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())),
-        }
-    }
-
-    pub fn into_settings(self) -> SettingsAnalytics {
-        SettingsAnalytics { faceting: self, ..Default::default() }
-    }
-}
-
-#[derive(Serialize, Default)]
-struct PaginationAnalytics {
-    max_total_hits: Option<usize>,
-}
-
-impl PaginationAnalytics {
-    pub fn new(setting: Option<&meilisearch_types::settings::PaginationSettings>) -> Self {
-        Self { max_total_hits: setting.as_ref().and_then(|s| s.max_total_hits.set()) }
-    }
-
-    pub fn into_settings(self) -> SettingsAnalytics {
-        SettingsAnalytics { pagination: self, ..Default::default() }
-    }
-}
-
-#[derive(Serialize, Default)]
-struct StopWordsAnalytics {
-    total: Option<usize>,
-}
-
-impl StopWordsAnalytics {
-    pub fn new(stop_words: Option<&BTreeSet<String>>) -> Self {
-        Self { total: stop_words.as_ref().map(|stop_words| stop_words.len()) }
-    }
-
-    pub fn into_settings(self) -> SettingsAnalytics {
-        SettingsAnalytics { stop_words: self, ..Default::default() }
-    }
-}
-
-#[derive(Serialize, Default)]
-struct SynonymsAnalytics {
-    total: Option<usize>,
-}
-
-impl SynonymsAnalytics {
-    pub fn new(synonyms: Option<&std::collections::BTreeMap<String, Vec<String>>>) -> Self {
-        Self { total: synonyms.as_ref().map(|synonyms| synonyms.len()) }
-    }
-
-    pub fn into_settings(self) -> SettingsAnalytics {
-        SettingsAnalytics { synonyms: self, ..Default::default() }
-    }
-}
-
-#[derive(Serialize, Default)]
-struct EmbeddersAnalytics {
-    // last
-    total: Option<usize>,
-    // Merge the sources
-    sources: Option<HashSet<String>>,
-    // |=
-    document_template_used: Option<bool>,
-    // max
-    document_template_max_bytes: Option<usize>,
-    // |=
-    binary_quantization_used: Option<bool>,
-}
-
-impl EmbeddersAnalytics {
-    pub fn new(
-        setting: Option<
-            &std::collections::BTreeMap<
-                String,
-                Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>,
-            >,
-        >,
-    ) -> Self {
-        let mut sources = std::collections::HashSet::new();
-
-        if let Some(s) = &setting {
-            for source in s
-                .values()
-                .filter_map(|config| config.clone().set())
-                .filter_map(|config| config.source.set())
-            {
-                use meilisearch_types::milli::vector::settings::EmbedderSource;
-                match source {
-                    EmbedderSource::OpenAi => sources.insert("openAi".to_string()),
-                    EmbedderSource::HuggingFace => sources.insert("huggingFace".to_string()),
-                    EmbedderSource::UserProvided => sources.insert("userProvided".to_string()),
-                    EmbedderSource::Ollama => sources.insert("ollama".to_string()),
-                    EmbedderSource::Rest => sources.insert("rest".to_string()),
-                };
-            }
-        };
-
-        Self {
-            total: setting.as_ref().map(|s| s.len()),
-            sources: Some(sources),
-            document_template_used: setting.as_ref().map(|map| {
-                map.values()
-                    .filter_map(|config| config.clone().set())
-                    .any(|config| config.document_template.set().is_some())
-            }),
-            document_template_max_bytes: setting.as_ref().and_then(|map| {
-                map.values()
-                    .filter_map(|config| config.clone().set())
-                    .filter_map(|config| config.document_template_max_bytes.set())
-                    .max()
-            }),
-            binary_quantization_used: setting.as_ref().map(|map| {
-                map.values()
-                    .filter_map(|config| config.clone().set())
-                    .any(|config| config.binary_quantized.set().is_some())
-            }),
-        }
-    }
-
-    pub fn into_settings(self) -> SettingsAnalytics {
-        SettingsAnalytics { embedders: self, ..Default::default() }
-    }
-}
-
-#[derive(Serialize, Default)]
-#[serde(transparent)]
-struct SearchCutoffMsAnalytics {
-    search_cutoff_ms: Option<u64>,
-}
-
-impl SearchCutoffMsAnalytics {
-    pub fn new(setting: Option<&u64>) -> Self {
-        Self { search_cutoff_ms: setting.copied() }
-    }
-
-    pub fn into_settings(self) -> SettingsAnalytics {
-        SettingsAnalytics { search_cutoff_ms: self, ..Default::default() }
-    }
-}
-
-#[derive(Serialize, Default)]
-#[serde(transparent)]
-struct LocalesAnalytics {
-    locales: Option<BTreeSet<Locale>>,
-}
-
-impl LocalesAnalytics {
-    pub fn new(
-        rules: Option<&Vec<meilisearch_types::locales::LocalizedAttributesRuleView>>,
-    ) -> Self {
-        LocalesAnalytics {
-            locales: rules.as_ref().map(|rules| {
-                rules
-                    .iter()
-                    .flat_map(|rule| rule.locales.iter().cloned())
-                    .collect::<std::collections::BTreeSet<_>>()
-            }),
-        }
-    }
-
-    pub fn into_settings(self) -> SettingsAnalytics {
-        SettingsAnalytics { locales: self, ..Default::default() }
-    }
-}
-
-#[derive(Serialize, Default)]
-struct DictionaryAnalytics {
-    total: Option<usize>,
-}
-
-impl DictionaryAnalytics {
-    pub fn new(dictionary: Option<&std::collections::BTreeSet<String>>) -> Self {
-        Self { total: dictionary.as_ref().map(|dictionary| dictionary.len()) }
-    }
-
-    pub fn into_settings(self) -> SettingsAnalytics {
-        SettingsAnalytics { dictionary: self, ..Default::default() }
-    }
-}
-
-#[derive(Serialize, Default)]
-struct SeparatorTokensAnalytics {
-    total: Option<usize>,
-}
-
-impl SeparatorTokensAnalytics {
-    pub fn new(separator_tokens: Option<&std::collections::BTreeSet<String>>) -> Self {
-        Self { total: separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()) }
-    }
-
-    pub fn into_settings(self) -> SettingsAnalytics {
-        SettingsAnalytics { separator_tokens: self, ..Default::default() }
-    }
-}
-
-#[derive(Serialize, Default)]
-struct NonSeparatorTokensAnalytics {
-    total: Option<usize>,
-}
-
-impl NonSeparatorTokensAnalytics {
-    pub fn new(non_separator_tokens: Option<&std::collections::BTreeSet<String>>) -> Self {
-        Self {
-            total: non_separator_tokens
-                .as_ref()
-                .map(|non_separator_tokens| non_separator_tokens.len()),
-        }
-    }
-
-    pub fn into_settings(self) -> SettingsAnalytics {
-        SettingsAnalytics { non_separator_tokens: self, ..Default::default() }
-    }
-}
-
 pub async fn update_all(
     index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
     index_uid: web::Path<String>,
diff --git a/meilisearch/src/routes/indexes/settings_analytics.rs b/meilisearch/src/routes/indexes/settings_analytics.rs
new file mode 100644
index 000000000..636ef3c57
--- /dev/null
+++ b/meilisearch/src/routes/indexes/settings_analytics.rs
@@ -0,0 +1,627 @@
+//! All the structures used to make the analytics on the settings works.
+//! The signatures of the `new` functions are not very rust idiomatic because they must match the types received
+//! through the sub-settings route directly without any manipulation.
+//! This is why we often use a `Option<&Vec<_>>` instead of a `Option<&[_]>`.
+
+use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView};
+use meilisearch_types::milli::update::Setting;
+use meilisearch_types::milli::vector::settings::EmbeddingSettings;
+use meilisearch_types::settings::{
+    FacetingSettings, PaginationSettings, ProximityPrecisionView, TypoSettings,
+};
+use meilisearch_types::{facet_values_sort::FacetValuesSort, settings::RankingRuleView};
+use serde::Serialize;
+use std::collections::{BTreeMap, BTreeSet, HashSet};
+
+use crate::analytics::Aggregate;
+
+#[derive(Serialize, Default)]
+pub struct SettingsAnalytics {
+    pub ranking_rules: RankingRulesAnalytics,
+    pub searchable_attributes: SearchableAttributesAnalytics,
+    pub displayed_attributes: DisplayedAttributesAnalytics,
+    pub sortable_attributes: SortableAttributesAnalytics,
+    pub filterable_attributes: FilterableAttributesAnalytics,
+    pub distinct_attribute: DistinctAttributeAnalytics,
+    pub proximity_precision: ProximityPrecisionAnalytics,
+    pub typo_tolerance: TypoToleranceAnalytics,
+    pub faceting: FacetingAnalytics,
+    pub pagination: PaginationAnalytics,
+    pub stop_words: StopWordsAnalytics,
+    pub synonyms: SynonymsAnalytics,
+    pub embedders: EmbeddersAnalytics,
+    pub search_cutoff_ms: SearchCutoffMsAnalytics,
+    pub locales: LocalesAnalytics,
+    pub dictionary: DictionaryAnalytics,
+    pub separator_tokens: SeparatorTokensAnalytics,
+    pub non_separator_tokens: NonSeparatorTokensAnalytics,
+}
+
+impl Aggregate for SettingsAnalytics {
+    fn event_name(&self) -> &'static str {
+        "Settings Updated"
+    }
+
+    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+        Box::new(Self {
+            ranking_rules: RankingRulesAnalytics {
+                words_position: self
+                    .ranking_rules
+                    .words_position
+                    .or(other.ranking_rules.words_position),
+                typo_position: self
+                    .ranking_rules
+                    .typo_position
+                    .or(other.ranking_rules.typo_position),
+                proximity_position: self
+                    .ranking_rules
+                    .proximity_position
+                    .or(other.ranking_rules.proximity_position),
+                attribute_position: self
+                    .ranking_rules
+                    .attribute_position
+                    .or(other.ranking_rules.attribute_position),
+                sort_position: self
+                    .ranking_rules
+                    .sort_position
+                    .or(other.ranking_rules.sort_position),
+                exactness_position: self
+                    .ranking_rules
+                    .exactness_position
+                    .or(other.ranking_rules.exactness_position),
+                values: self.ranking_rules.values.or(other.ranking_rules.values),
+            },
+            searchable_attributes: SearchableAttributesAnalytics {
+                total: self.searchable_attributes.total.or(other.searchable_attributes.total),
+                with_wildcard: self
+                    .searchable_attributes
+                    .with_wildcard
+                    .or(other.searchable_attributes.with_wildcard),
+            },
+            displayed_attributes: DisplayedAttributesAnalytics {
+                total: self.displayed_attributes.total.or(other.displayed_attributes.total),
+                with_wildcard: self
+                    .displayed_attributes
+                    .with_wildcard
+                    .or(other.displayed_attributes.with_wildcard),
+            },
+            sortable_attributes: SortableAttributesAnalytics {
+                total: self.sortable_attributes.total.or(other.sortable_attributes.total),
+                has_geo: self.sortable_attributes.has_geo.or(other.sortable_attributes.has_geo),
+            },
+            filterable_attributes: FilterableAttributesAnalytics {
+                total: self.filterable_attributes.total.or(other.filterable_attributes.total),
+                has_geo: self.filterable_attributes.has_geo.or(other.filterable_attributes.has_geo),
+            },
+            distinct_attribute: DistinctAttributeAnalytics {
+                set: self.distinct_attribute.set | other.distinct_attribute.set,
+            },
+            proximity_precision: ProximityPrecisionAnalytics {
+                set: self.proximity_precision.set | other.proximity_precision.set,
+                value: self.proximity_precision.value.or(other.proximity_precision.value),
+            },
+            typo_tolerance: TypoToleranceAnalytics {
+                enabled: self.typo_tolerance.enabled.or(other.typo_tolerance.enabled),
+                disable_on_attributes: self
+                    .typo_tolerance
+                    .disable_on_attributes
+                    .or(other.typo_tolerance.disable_on_attributes),
+                disable_on_words: self
+                    .typo_tolerance
+                    .disable_on_words
+                    .or(other.typo_tolerance.disable_on_words),
+                min_word_size_for_one_typo: self
+                    .typo_tolerance
+                    .min_word_size_for_one_typo
+                    .or(other.typo_tolerance.min_word_size_for_one_typo),
+                min_word_size_for_two_typos: self
+                    .typo_tolerance
+                    .min_word_size_for_two_typos
+                    .or(other.typo_tolerance.min_word_size_for_two_typos),
+            },
+            faceting: FacetingAnalytics {
+                max_values_per_facet: self
+                    .faceting
+                    .max_values_per_facet
+                    .or(other.faceting.max_values_per_facet),
+                sort_facet_values_by_star_count: self
+                    .faceting
+                    .sort_facet_values_by_star_count
+                    .or(other.faceting.sort_facet_values_by_star_count),
+                sort_facet_values_by_total: self
+                    .faceting
+                    .sort_facet_values_by_total
+                    .or(other.faceting.sort_facet_values_by_total),
+            },
+            pagination: PaginationAnalytics {
+                max_total_hits: self.pagination.max_total_hits.or(other.pagination.max_total_hits),
+            },
+            stop_words: StopWordsAnalytics {
+                total: self.stop_words.total.or(other.stop_words.total),
+            },
+            synonyms: SynonymsAnalytics { total: self.synonyms.total.or(other.synonyms.total) },
+            embedders: EmbeddersAnalytics {
+                total: self.embedders.total.or(other.embedders.total),
+                sources: match (self.embedders.sources, other.embedders.sources) {
+                    (None, None) => None,
+                    (Some(sources), None) | (None, Some(sources)) => Some(sources),
+                    (Some(this), Some(other)) => Some(this.union(&other).cloned().collect()),
+                },
+                document_template_used: match (
+                    self.embedders.document_template_used,
+                    other.embedders.document_template_used,
+                ) {
+                    (None, None) => None,
+                    (Some(used), None) | (None, Some(used)) => Some(used),
+                    (Some(this), Some(other)) => Some(this | other),
+                },
+                document_template_max_bytes: match (
+                    self.embedders.document_template_max_bytes,
+                    other.embedders.document_template_max_bytes,
+                ) {
+                    (None, None) => None,
+                    (Some(bytes), None) | (None, Some(bytes)) => Some(bytes),
+                    (Some(this), Some(other)) => Some(this.max(other)),
+                },
+                binary_quantization_used: match (
+                    self.embedders.binary_quantization_used,
+                    other.embedders.binary_quantization_used,
+                ) {
+                    (None, None) => None,
+                    (Some(bq), None) | (None, Some(bq)) => Some(bq),
+                    (Some(this), Some(other)) => Some(this | other),
+                },
+            },
+            search_cutoff_ms: SearchCutoffMsAnalytics {
+                search_cutoff_ms: self
+                    .search_cutoff_ms
+                    .search_cutoff_ms
+                    .or(other.search_cutoff_ms.search_cutoff_ms),
+            },
+            locales: LocalesAnalytics { locales: self.locales.locales.or(other.locales.locales) },
+            dictionary: DictionaryAnalytics {
+                total: self.dictionary.total.or(other.dictionary.total),
+            },
+            separator_tokens: SeparatorTokensAnalytics {
+                total: self.separator_tokens.total.or(other.non_separator_tokens.total),
+            },
+            non_separator_tokens: NonSeparatorTokensAnalytics {
+                total: self.non_separator_tokens.total.or(other.non_separator_tokens.total),
+            },
+        })
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        serde_json::to_value(*self).unwrap_or_default()
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct RankingRulesAnalytics {
+    pub words_position: Option<usize>,
+    pub typo_position: Option<usize>,
+    pub proximity_position: Option<usize>,
+    pub attribute_position: Option<usize>,
+    pub sort_position: Option<usize>,
+    pub exactness_position: Option<usize>,
+    pub values: Option<String>,
+}
+
+impl RankingRulesAnalytics {
+    pub fn new(rr: Option<&Vec<RankingRuleView>>) -> Self {
+        RankingRulesAnalytics {
+            words_position: rr.as_ref().and_then(|rr| {
+                rr.iter()
+                    .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words))
+            }),
+            typo_position: rr.as_ref().and_then(|rr| {
+                rr.iter()
+                    .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo))
+            }),
+            proximity_position: rr.as_ref().and_then(|rr| {
+                rr.iter().position(|s| {
+                    matches!(s, meilisearch_types::settings::RankingRuleView::Proximity)
+                })
+            }),
+            attribute_position: rr.as_ref().and_then(|rr| {
+                rr.iter().position(|s| {
+                    matches!(s, meilisearch_types::settings::RankingRuleView::Attribute)
+                })
+            }),
+            sort_position: rr.as_ref().and_then(|rr| {
+                rr.iter()
+                    .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort))
+            }),
+            exactness_position: rr.as_ref().and_then(|rr| {
+                rr.iter().position(|s| {
+                    matches!(s, meilisearch_types::settings::RankingRuleView::Exactness)
+                })
+            }),
+            values: rr.as_ref().map(|rr| {
+                rr.iter()
+                    .filter(|s| {
+                        matches!(
+                            s,
+                            meilisearch_types::settings::RankingRuleView::Asc(_)
+                                | meilisearch_types::settings::RankingRuleView::Desc(_)
+                        )
+                    })
+                    .map(|x| x.to_string())
+                    .collect::<Vec<_>>()
+                    .join(", ")
+            }),
+        }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { ranking_rules: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct SearchableAttributesAnalytics {
+    pub total: Option<usize>,
+    pub with_wildcard: Option<bool>,
+}
+
+impl SearchableAttributesAnalytics {
+    pub fn new(setting: Option<&Vec<String>>) -> Self {
+        Self {
+            total: setting.as_ref().map(|searchable| searchable.len()),
+            with_wildcard: setting
+                .as_ref()
+                .map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
+        }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { searchable_attributes: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct DisplayedAttributesAnalytics {
+    pub total: Option<usize>,
+    pub with_wildcard: Option<bool>,
+}
+
+impl DisplayedAttributesAnalytics {
+    pub fn new(displayed: Option<&Vec<String>>) -> Self {
+        Self {
+            total: displayed.as_ref().map(|displayed| displayed.len()),
+            with_wildcard: displayed
+                .as_ref()
+                .map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
+        }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { displayed_attributes: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct SortableAttributesAnalytics {
+    pub total: Option<usize>,
+    pub has_geo: Option<bool>,
+}
+
+impl SortableAttributesAnalytics {
+    pub fn new(setting: Option<&BTreeSet<String>>) -> Self {
+        Self {
+            total: setting.as_ref().map(|sort| sort.len()),
+            has_geo: setting.as_ref().map(|sort| sort.contains("_geo")),
+        }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { sortable_attributes: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct FilterableAttributesAnalytics {
+    pub total: Option<usize>,
+    pub has_geo: Option<bool>,
+}
+
+impl FilterableAttributesAnalytics {
+    pub fn new(setting: Option<&BTreeSet<String>>) -> Self {
+        Self {
+            total: setting.as_ref().map(|filter| filter.len()),
+            has_geo: setting.as_ref().map(|filter| filter.contains("_geo")),
+        }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { filterable_attributes: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct DistinctAttributeAnalytics {
+    pub set: bool,
+}
+
+impl DistinctAttributeAnalytics {
+    pub fn new(distinct: Option<&String>) -> Self {
+        Self { set: distinct.is_some() }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { distinct_attribute: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct ProximityPrecisionAnalytics {
+    pub set: bool,
+    pub value: Option<ProximityPrecisionView>,
+}
+
+impl ProximityPrecisionAnalytics {
+    pub fn new(precision: Option<&ProximityPrecisionView>) -> Self {
+        Self { set: precision.is_some(), value: precision.cloned() }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { proximity_precision: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct TypoToleranceAnalytics {
+    pub enabled: Option<bool>,
+    pub disable_on_attributes: Option<bool>,
+    pub disable_on_words: Option<bool>,
+    pub min_word_size_for_one_typo: Option<u8>,
+    pub min_word_size_for_two_typos: Option<u8>,
+}
+
+impl TypoToleranceAnalytics {
+    pub fn new(setting: Option<&TypoSettings>) -> Self {
+        Self {
+            enabled: setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))),
+            disable_on_attributes: setting
+                .as_ref()
+                .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
+            disable_on_words: setting
+                .as_ref()
+                .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
+            min_word_size_for_one_typo: setting
+                .as_ref()
+                .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.one_typo.set()))
+                .flatten(),
+            min_word_size_for_two_typos: setting
+                .as_ref()
+                .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.two_typos.set()))
+                .flatten(),
+        }
+    }
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { typo_tolerance: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct FacetingAnalytics {
+    pub max_values_per_facet: Option<usize>,
+    pub sort_facet_values_by_star_count: Option<bool>,
+    pub sort_facet_values_by_total: Option<usize>,
+}
+
+impl FacetingAnalytics {
+    pub fn new(setting: Option<&FacetingSettings>) -> Self {
+        Self {
+            max_values_per_facet: setting.as_ref().and_then(|s| s.max_values_per_facet.set()),
+            sort_facet_values_by_star_count: setting.as_ref().and_then(|s| {
+                s.sort_facet_values_by
+                    .as_ref()
+                    .set()
+                    .map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count))
+            }),
+            sort_facet_values_by_total: setting
+                .as_ref()
+                .and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())),
+        }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { faceting: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct PaginationAnalytics {
+    pub max_total_hits: Option<usize>,
+}
+
+impl PaginationAnalytics {
+    pub fn new(setting: Option<&PaginationSettings>) -> Self {
+        Self { max_total_hits: setting.as_ref().and_then(|s| s.max_total_hits.set()) }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { pagination: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct StopWordsAnalytics {
+    pub total: Option<usize>,
+}
+
+impl StopWordsAnalytics {
+    pub fn new(stop_words: Option<&BTreeSet<String>>) -> Self {
+        Self { total: stop_words.as_ref().map(|stop_words| stop_words.len()) }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { stop_words: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct SynonymsAnalytics {
+    pub total: Option<usize>,
+}
+
+impl SynonymsAnalytics {
+    pub fn new(synonyms: Option<&BTreeMap<String, Vec<String>>>) -> Self {
+        Self { total: synonyms.as_ref().map(|synonyms| synonyms.len()) }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { synonyms: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct EmbeddersAnalytics {
+    // last
+    pub total: Option<usize>,
+    // Merge the sources
+    pub sources: Option<HashSet<String>>,
+    // |=
+    pub document_template_used: Option<bool>,
+    // max
+    pub document_template_max_bytes: Option<usize>,
+    // |=
+    pub binary_quantization_used: Option<bool>,
+}
+
+impl EmbeddersAnalytics {
+    pub fn new(setting: Option<&BTreeMap<String, Setting<EmbeddingSettings>>>) -> Self {
+        let mut sources = std::collections::HashSet::new();
+
+        if let Some(s) = &setting {
+            for source in s
+                .values()
+                .filter_map(|config| config.clone().set())
+                .filter_map(|config| config.source.set())
+            {
+                use meilisearch_types::milli::vector::settings::EmbedderSource;
+                match source {
+                    EmbedderSource::OpenAi => sources.insert("openAi".to_string()),
+                    EmbedderSource::HuggingFace => sources.insert("huggingFace".to_string()),
+                    EmbedderSource::UserProvided => sources.insert("userProvided".to_string()),
+                    EmbedderSource::Ollama => sources.insert("ollama".to_string()),
+                    EmbedderSource::Rest => sources.insert("rest".to_string()),
+                };
+            }
+        };
+
+        Self {
+            total: setting.as_ref().map(|s| s.len()),
+            sources: Some(sources),
+            document_template_used: setting.as_ref().map(|map| {
+                map.values()
+                    .filter_map(|config| config.clone().set())
+                    .any(|config| config.document_template.set().is_some())
+            }),
+            document_template_max_bytes: setting.as_ref().and_then(|map| {
+                map.values()
+                    .filter_map(|config| config.clone().set())
+                    .filter_map(|config| config.document_template_max_bytes.set())
+                    .max()
+            }),
+            binary_quantization_used: setting.as_ref().map(|map| {
+                map.values()
+                    .filter_map(|config| config.clone().set())
+                    .any(|config| config.binary_quantized.set().is_some())
+            }),
+        }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { embedders: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+#[serde(transparent)]
+pub struct SearchCutoffMsAnalytics {
+    pub search_cutoff_ms: Option<u64>,
+}
+
+impl SearchCutoffMsAnalytics {
+    pub fn new(setting: Option<&u64>) -> Self {
+        Self { search_cutoff_ms: setting.copied() }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { search_cutoff_ms: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+#[serde(transparent)]
+pub struct LocalesAnalytics {
+    pub locales: Option<BTreeSet<Locale>>,
+}
+
+impl LocalesAnalytics {
+    pub fn new(rules: Option<&Vec<LocalizedAttributesRuleView>>) -> Self {
+        LocalesAnalytics {
+            locales: rules.as_ref().map(|rules| {
+                rules
+                    .iter()
+                    .flat_map(|rule| rule.locales.iter().cloned())
+                    .collect::<std::collections::BTreeSet<_>>()
+            }),
+        }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { locales: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct DictionaryAnalytics {
+    pub total: Option<usize>,
+}
+
+impl DictionaryAnalytics {
+    pub fn new(dictionary: Option<&BTreeSet<String>>) -> Self {
+        Self { total: dictionary.as_ref().map(|dictionary| dictionary.len()) }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { dictionary: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct SeparatorTokensAnalytics {
+    pub total: Option<usize>,
+}
+
+impl SeparatorTokensAnalytics {
+    pub fn new(separator_tokens: Option<&BTreeSet<String>>) -> Self {
+        Self { total: separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()) }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { separator_tokens: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct NonSeparatorTokensAnalytics {
+    pub total: Option<usize>,
+}
+
+impl NonSeparatorTokensAnalytics {
+    pub fn new(non_separator_tokens: Option<&BTreeSet<String>>) -> Self {
+        Self {
+            total: non_separator_tokens
+                .as_ref()
+                .map(|non_separator_tokens| non_separator_tokens.len()),
+        }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { non_separator_tokens: self, ..Default::default() }
+    }
+}

From 18ac4032aa5512c96b0068d0603f4db285f81bd9 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 17 Oct 2024 09:35:11 +0200
Subject: [PATCH 63/92] Remove the experimental feature seen

---
 meilisearch/src/routes/features.rs | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs
index 1de00717d..8bdb3ffb3 100644
--- a/meilisearch/src/routes/features.rs
+++ b/meilisearch/src/routes/features.rs
@@ -17,24 +17,19 @@ use crate::extractors::sequential_extractor::SeqHandler;
 pub fn configure(cfg: &mut web::ServiceConfig) {
     cfg.service(
         web::resource("")
-            .route(web::get().to(SeqHandler(get_features)))
+            .route(web::get().to(get_features))
             .route(web::patch().to(SeqHandler(patch_features))),
     );
 }
 
-crate::empty_analytics!(GetExperimentalFeatureAnalytics, "Experimental features Seen");
-
 async fn get_features(
     index_scheduler: GuardedData<
         ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_GET }>,
         Data<IndexScheduler>,
     >,
-    req: HttpRequest,
-    analytics: Data<Analytics>,
 ) -> HttpResponse {
     let features = index_scheduler.features();
 
-    analytics.publish(GetExperimentalFeatureAnalytics::default(), &req);
     let features = features.runtime_features();
     debug!(returns = ?features, "Get features");
     HttpResponse::Ok().json(features)

From 1ab6fec9030351956fd2462dc5afb3b2b317860c Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 17 Oct 2024 09:49:21 +0200
Subject: [PATCH 64/92] send all experimental features in the info event
 including the runtime one

---
 .../src/analytics/segment_analytics.rs        | 44 +++++++++++++------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 1edfa1bdd..c0c2b64d8 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -10,6 +10,7 @@ use actix_web::HttpRequest;
 use byte_unit::Byte;
 use index_scheduler::IndexScheduler;
 use meilisearch_auth::{AuthController, AuthFilter};
+use meilisearch_types::features::RuntimeTogglableFeatures;
 use meilisearch_types::locales::Locale;
 use meilisearch_types::InstanceUid;
 use once_cell::sync::Lazy;
@@ -173,7 +174,9 @@ impl SegmentAnalytics {
 struct Infos {
     env: String,
     experimental_contains_filter: bool,
+    experimental_vector_store: bool,
     experimental_enable_metrics: bool,
+    experimental_edit_documents_by_function: bool,
     experimental_search_queue_size: usize,
     experimental_drop_search_after: usize,
     experimental_nb_searches_per_core: usize,
@@ -210,8 +213,8 @@ struct Infos {
     ssl_tickets: bool,
 }
 
-impl From<Opt> for Infos {
-    fn from(options: Opt) -> Self {
+impl Infos {
+    pub fn new(options: Opt, features: RuntimeTogglableFeatures) -> Self {
         // We wants to decompose this whole struct by hand to be sure we don't forget
         // to add analytics when we add a field in the Opt.
         // Thus we must not insert `..` at the end.
@@ -254,8 +257,7 @@ impl From<Opt> for Infos {
             log_level,
             indexer_options,
             config_file_path,
-            #[cfg(feature = "analytics")]
-                no_analytics: _,
+            no_analytics: _,
         } = options;
 
         let schedule_snapshot = match schedule_snapshot {
@@ -266,18 +268,28 @@ impl From<Opt> for Infos {
         let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } =
             indexer_options;
 
+        let RuntimeTogglableFeatures {
+            vector_store,
+            metrics,
+            logs_route,
+            edit_documents_by_function,
+            contains_filter,
+        } = features;
+
         // We're going to override every sensible information.
         // We consider information sensible if it contains a path, an address, or a key.
         Self {
             env,
-            experimental_contains_filter,
-            experimental_enable_metrics,
+            experimental_contains_filter: experimental_contains_filter | contains_filter,
+            experimental_vector_store: vector_store,
+            experimental_edit_documents_by_function: edit_documents_by_function,
+            experimental_enable_metrics: experimental_enable_metrics | metrics,
             experimental_search_queue_size,
             experimental_drop_search_after: experimental_drop_search_after.into(),
             experimental_nb_searches_per_core: experimental_nb_searches_per_core.into(),
             experimental_logs_mode,
             experimental_replication_parameters,
-            experimental_enable_logs_route,
+            experimental_enable_logs_route: experimental_enable_logs_route | logs_route,
             experimental_reduce_indexing_memory_usage,
             gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
             db_path: db_path != PathBuf::from("./data.ms"),
@@ -319,7 +331,7 @@ pub struct Segment {
 }
 
 impl Segment {
-    fn compute_traits(opt: &Opt, stats: Stats) -> Value {
+    fn compute_traits(opt: &Opt, stats: Stats, features: RuntimeTogglableFeatures) -> Value {
         static FIRST_START_TIMESTAMP: Lazy<Instant> = Lazy::new(Instant::now);
         static SYSTEM: Lazy<Value> = Lazy::new(|| {
             let disks = Disks::new_with_refreshed_list();
@@ -347,7 +359,7 @@ impl Segment {
                 "indexes_number": stats.indexes.len(),
                 "documents_number": number_of_documents,
             },
-            "infos": Infos::from(opt.clone()),
+            "infos": Infos::new(opt.clone(), features),
         })
     }
 
@@ -399,9 +411,11 @@ impl Segment {
         index_scheduler: Arc<IndexScheduler>,
         auth_controller: Arc<AuthController>,
     ) {
-        if let Ok(stats) =
-            create_all_stats(index_scheduler.into(), auth_controller.into(), &AuthFilter::default())
-        {
+        if let Ok(stats) = create_all_stats(
+            index_scheduler.clone().into(),
+            auth_controller.into(),
+            &AuthFilter::default(),
+        ) {
             // Replace the version number with the prototype name if any.
             let version = if let Some(prototype) = build_info::DescribeResult::from_build()
                 .and_then(|describe| describe.as_prototype())
@@ -420,7 +434,11 @@ impl Segment {
                         },
                     })),
                     user: self.user.clone(),
-                    traits: Self::compute_traits(&self.opt, stats),
+                    traits: Self::compute_traits(
+                        &self.opt,
+                        stats,
+                        index_scheduler.features().runtime_features(),
+                    ),
                     ..Default::default()
                 })
                 .await;

From fa1db6b7216fce5e9727dfacbcdccc770ef80f16 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 17 Oct 2024 09:55:30 +0200
Subject: [PATCH 65/92] fix the tests

---
 meilisearch/src/analytics/mod.rs    | 4 ++++
 meilisearch/tests/common/service.rs | 5 +++--
 meilisearch/tests/logs/mod.rs       | 5 +++--
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs
index d08f3307c..75e8083c5 100644
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@@ -158,6 +158,10 @@ impl Analytics {
         }
     }
 
+    pub fn no_analytics() -> Self {
+        Self { segment: None }
+    }
+
     pub fn instance_uid(&self) -> Option<&InstanceUid> {
         self.segment.as_ref().map(|segment| segment.instance_uid.as_ref())
     }
diff --git a/meilisearch/tests/common/service.rs b/meilisearch/tests/common/service.rs
index 8addbacf8..c0b07c217 100644
--- a/meilisearch/tests/common/service.rs
+++ b/meilisearch/tests/common/service.rs
@@ -9,8 +9,9 @@ use actix_web::test;
 use actix_web::test::TestRequest;
 use actix_web::web::Data;
 use index_scheduler::IndexScheduler;
+use meilisearch::analytics::Analytics;
 use meilisearch::search_queue::SearchQueue;
-use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer};
+use meilisearch::{create_app, Opt, SubscriberForSecondLayer};
 use meilisearch_auth::AuthController;
 use tracing::level_filters::LevelFilter;
 use tracing_subscriber::Layer;
@@ -141,7 +142,7 @@ impl Service {
             Data::new(search_queue),
             self.options.clone(),
             (route_layer_handle, stderr_layer_handle),
-            analytics::MockAnalytics::new(&self.options),
+            Data::new(Analytics::no_analytics()),
             true,
         ))
         .await
diff --git a/meilisearch/tests/logs/mod.rs b/meilisearch/tests/logs/mod.rs
index 9f4649dca..26482b561 100644
--- a/meilisearch/tests/logs/mod.rs
+++ b/meilisearch/tests/logs/mod.rs
@@ -7,8 +7,9 @@ use std::str::FromStr;
 use actix_web::http::header::ContentType;
 use actix_web::web::Data;
 use meili_snap::snapshot;
+use meilisearch::analytics::Analytics;
 use meilisearch::search_queue::SearchQueue;
-use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer};
+use meilisearch::{create_app, Opt, SubscriberForSecondLayer};
 use tracing::level_filters::LevelFilter;
 use tracing_subscriber::layer::SubscriberExt;
 use tracing_subscriber::Layer;
@@ -54,7 +55,7 @@ async fn basic_test_log_stream_route() {
         Data::new(search_queue),
         server.service.options.clone(),
         (route_layer_handle, stderr_layer_handle),
-        analytics::MockAnalytics::new(&server.service.options),
+        Data::new(Analytics::no_analytics()),
         true,
     ))
     .await;

From 3a7a20c7162b728a99327eb32b012f6651e7186b Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 17 Oct 2024 11:14:33 +0200
Subject: [PATCH 66/92] remove the segment feature and always import segment

---
 meilisearch/Cargo.toml                        |  5 ++---
 meilisearch/src/analytics/mod.rs              | 21 +++++++++++++++----
 .../src/analytics/segment_analytics.rs        |  1 -
 meilisearch/src/option.rs                     |  9 +-------
 meilisearch/tests/common/server.rs            |  1 -
 5 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml
index 07357e724..57202f59f 100644
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -75,7 +75,7 @@ reqwest = { version = "0.12.5", features = [
 rustls = { version = "0.23.11", features = ["ring"], default-features = false }
 rustls-pki-types = { version = "1.7.0", features = ["alloc"] }
 rustls-pemfile = "2.1.2"
-segment = { version = "0.2.4", optional = true }
+segment = { version = "0.2.4" }
 serde = { version = "1.0.204", features = ["derive"] }
 serde_json = { version = "1.0.120", features = ["preserve_order"] }
 sha2 = "0.10.8"
@@ -132,8 +132,7 @@ tempfile = { version = "3.10.1", optional = true }
 zip = { version = "2.1.3", optional = true }
 
 [features]
-default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
-analytics = ["segment"]
+default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
 mini-dashboard = [
     "static-files",
     "anyhow",
diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs
index 75e8083c5..67b830204 100644
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@@ -1,5 +1,3 @@
-#![allow(clippy::transmute_ptr_to_ref)] // mopify isn't updated with the latest version of clippy yet
-
 pub mod segment_analytics;
 
 use std::fs;
@@ -85,13 +83,19 @@ pub enum DocumentFetchKind {
     Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
 }
 
+/// To send an event to segment, your event must be able to aggregate itself with another event of the same type.
 pub trait Aggregate: 'static + mopa::Any + Send {
+    /// The name of the event that will be sent to segment.
     fn event_name(&self) -> &'static str;
 
+    /// Will be called every time an event has been used twice before segment flushed its buffer.
     fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self>
     where
         Self: Sized;
 
+    /// An internal helper function, you shouldn't implement it yourself.
+    /// This function should always be called on the same type. If `this` and `other`
+    /// aren't the same type behind the function will do nothing and return `None`.
     fn downcast_aggregate(
         this: Box<dyn Aggregate>,
         other: Box<dyn Aggregate>,
@@ -100,6 +104,7 @@ pub trait Aggregate: 'static + mopa::Any + Send {
         Self: Sized,
     {
         if this.is::<Self>() && other.is::<Self>() {
+            // Both the two following lines cannot fail, but just to be sure we don't crash, we're still avoiding unwrapping
             let this = this.downcast::<Self>().ok()?;
             let other = other.downcast::<Self>().ok()?;
             Some(Self::aggregate(this, other))
@@ -108,18 +113,26 @@ pub trait Aggregate: 'static + mopa::Any + Send {
         }
     }
 
+    /// Converts your structure to the final event that'll be sent to segment.
     fn into_event(self: Box<Self>) -> serde_json::Value;
 }
 
 mopafy!(Aggregate);
 
-/// Helper trait to define multiple aggregate with the same content but a different name.
-/// Commonly used when you must aggregate a search with POST or with GET for example.
+/// Helper trait to define multiple aggregates with the same content but a different name.
+/// Commonly used when you must aggregate a search with POST or with GET, for example.
 pub trait AggregateMethod: 'static + Default + Send {
     fn event_name() -> &'static str;
 }
 
 /// A macro used to quickly define multiple aggregate method with their name
+/// Usage:
+/// ```rust
+/// aggregate_methods!(
+///     SearchGET => "Documents Searched GET",
+///     SearchPOST => "Documents Searched POST",
+/// );
+/// ```
 #[macro_export]
 macro_rules! aggregate_methods {
     ($method:ident => $event_name:literal) => {
diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index c0c2b64d8..10927f49b 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -695,7 +695,6 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
 aggregate_methods!(
     SearchGET => "Documents Searched GET",
     SearchPOST => "Documents Searched POST",
-
 );
 
 impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs
index 02dc660a4..7e87a5a2c 100644
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@@ -29,7 +29,6 @@ const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
 const MEILI_ENV: &str = "MEILI_ENV";
 const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL";
 const MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER: &str = "MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER";
-#[cfg(feature = "analytics")]
 const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
 const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
 const MEILI_SSL_CERT_PATH: &str = "MEILI_SSL_CERT_PATH";
@@ -210,7 +209,6 @@ pub struct Opt {
     /// Meilisearch automatically collects data from all instances that do not opt out using this flag.
     /// All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted
     /// at any time.
-    #[cfg(feature = "analytics")]
     #[serde(default)] // we can't send true
     #[clap(long, env = MEILI_NO_ANALYTICS)]
     pub no_analytics: bool,
@@ -425,7 +423,6 @@ pub struct Opt {
 
 impl Opt {
     /// Whether analytics should be enabled or not.
-    #[cfg(all(not(debug_assertions), feature = "analytics"))]
     pub fn analytics(&self) -> bool {
         !self.no_analytics
     }
@@ -505,7 +502,6 @@ impl Opt {
             ignore_missing_dump: _,
             ignore_dump_if_db_exists: _,
             config_file_path: _,
-            #[cfg(feature = "analytics")]
             no_analytics,
             experimental_contains_filter,
             experimental_enable_metrics,
@@ -533,10 +529,7 @@ impl Opt {
             );
         }
 
-        #[cfg(feature = "analytics")]
-        {
-            export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
-        }
+        export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
         export_to_env_if_not_present(
             MEILI_HTTP_PAYLOAD_SIZE_LIMIT,
             http_payload_size_limit.to_string(),
diff --git a/meilisearch/tests/common/server.rs b/meilisearch/tests/common/server.rs
index 6d331ebbc..92f181398 100644
--- a/meilisearch/tests/common/server.rs
+++ b/meilisearch/tests/common/server.rs
@@ -381,7 +381,6 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
         db_path: dir.as_ref().join("db"),
         dump_dir: dir.as_ref().join("dumps"),
         env: "development".to_owned(),
-        #[cfg(feature = "analytics")]
         no_analytics: true,
         max_index_size: Byte::from_u64_with_unit(100, Unit::MiB).unwrap(),
         max_task_db_size: Byte::from_u64_with_unit(1, Unit::GiB).unwrap(),

From 89e2d2b2b9b83a44e2a2af8e2d13020be72c1260 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 17 Oct 2024 13:55:49 +0200
Subject: [PATCH 67/92] fix the doctest

---
 meilisearch/src/analytics/mod.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs
index 67b830204..48ac13fc0 100644
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@@ -128,6 +128,8 @@ pub trait AggregateMethod: 'static + Default + Send {
 /// A macro used to quickly define multiple aggregate method with their name
 /// Usage:
 /// ```rust
+/// use meilisearch::aggregate_methods;
+///
 /// aggregate_methods!(
 ///     SearchGET => "Documents Searched GET",
 ///     SearchPOST => "Documents Searched POST",

From e51e6f902a13525610c4d0a81125c7292da3de36 Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Sat, 19 Oct 2024 13:42:02 +0300
Subject: [PATCH 68/92] Highlight partially cropped matches too

---
 milli/src/search/new/matches/match.rs         |  2 +-
 .../src/search/new/matches/matching_words.rs  | 25 +++--
 milli/src/search/new/matches/mod.rs           | 94 ++++++++++---------
 3 files changed, 67 insertions(+), 54 deletions(-)

diff --git a/milli/src/search/new/matches/match.rs b/milli/src/search/new/matches/match.rs
index cc08b006c..2eef4d5a6 100644
--- a/milli/src/search/new/matches/match.rs
+++ b/milli/src/search/new/matches/match.rs
@@ -18,7 +18,7 @@ pub enum MatchPosition {
 
 #[derive(Clone, Debug)]
 pub struct Match {
-    pub match_len: usize,
+    pub char_count: usize,
     // ids of the query words that matches.
     pub ids: Vec<WordId>,
     pub position: MatchPosition,
diff --git a/milli/src/search/new/matches/matching_words.rs b/milli/src/search/new/matches/matching_words.rs
index e4d2785ca..1f30a17ad 100644
--- a/milli/src/search/new/matches/matching_words.rs
+++ b/milli/src/search/new/matches/matching_words.rs
@@ -86,14 +86,17 @@ impl MatchingWords {
                         continue;
                     };
                     let prefix_length = char_index + c.len_utf8();
-                    let char_len = token.original_lengths(prefix_length).0;
+                    let (char_count, byte_len) = token.original_lengths(prefix_length);
                     let ids = &located_words.positions;
-                    return Some(MatchType::Full { char_len, ids });
+                    return Some(MatchType::Full { ids, char_count, byte_len });
                 // else we exact match the token.
                 } else if token.lemma() == word {
-                    let char_len = token.char_end - token.char_start;
                     let ids = &located_words.positions;
-                    return Some(MatchType::Full { char_len, ids });
+                    return Some(MatchType::Full {
+                        char_count: token.char_end - token.char_start,
+                        byte_len: token.byte_end - token.byte_start,
+                        ids,
+                    });
                 }
             }
         }
@@ -149,7 +152,7 @@ pub type WordId = u16;
 /// In these cases we need to match consecutively several tokens to consider that the match is full.
 #[derive(Debug, PartialEq)]
 pub enum MatchType<'a> {
-    Full { char_len: usize, ids: &'a RangeInclusive<WordId> },
+    Full { char_count: usize, byte_len: usize, ids: &'a RangeInclusive<WordId> },
     Partial(PartialMatch<'a>),
 }
 
@@ -183,7 +186,11 @@ impl<'a> PartialMatch<'a> {
         // if there is no remaining word to match in the phrase and the current token is matching,
         // return a Full match.
         } else if is_matching {
-            Some(MatchType::Full { char_len: token.char_end - token.char_start, ids })
+            Some(MatchType::Full {
+                char_count: token.char_end - token.char_start,
+                byte_len: token.byte_end - token.byte_start,
+                ids,
+            })
         // if the current token doesn't match, return None to break the match sequence.
         } else {
             None
@@ -270,7 +277,7 @@ pub(crate) mod tests {
                     ..Default::default()
                 })
                 .next(),
-            Some(MatchType::Full { char_len: 5, ids: &(0..=0) })
+            Some(MatchType::Full { char_count: 5, byte_len: 5, ids: &(0..=0) })
         );
         assert_eq!(
             matching_words
@@ -294,7 +301,7 @@ pub(crate) mod tests {
                     ..Default::default()
                 })
                 .next(),
-            Some(MatchType::Full { char_len: 5, ids: &(2..=2) })
+            Some(MatchType::Full { char_count: 5, byte_len: 5, ids: &(2..=2) })
         );
         assert_eq!(
             matching_words
@@ -306,7 +313,7 @@ pub(crate) mod tests {
                     ..Default::default()
                 })
                 .next(),
-            Some(MatchType::Full { char_len: 5, ids: &(2..=2) })
+            Some(MatchType::Full { char_count: 5, byte_len: 5, ids: &(2..=2) })
         );
         assert_eq!(
             matching_words
diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index ac0fb7e7b..80e3ec7b2 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -10,7 +10,10 @@ use matching_words::{MatchType, PartialMatch};
 use r#match::{Match, MatchPosition};
 use serde::Serialize;
 use simple_token_kind::SimpleTokenKind;
-use std::borrow::Cow;
+use std::{
+    borrow::Cow,
+    cmp::{max, min},
+};
 
 const DEFAULT_CROP_MARKER: &str = "…";
 const DEFAULT_HIGHLIGHT_PREFIX: &str = "<em>";
@@ -139,7 +142,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                     Some(MatchType::Full { ids, .. }) => {
                         // save the token that closes the partial match as a match.
                         matches.push(Match {
-                            match_len: word.char_end - *first_word_char_start,
+                            char_count: word.char_end - *first_word_char_start,
                             ids: ids.clone().collect(),
                             position: MatchPosition::Phrase {
                                 word_positions: [first_word_position, word_position],
@@ -182,10 +185,10 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                 match match_type {
                     // we match, we save the current token as a match,
                     // then we continue the rest of the tokens.
-                    MatchType::Full { char_len, ids } => {
+                    MatchType::Full { ids, char_count, .. } => {
                         let ids: Vec<_> = ids.clone().collect();
                         matches.push(Match {
-                            match_len: char_len,
+                            char_count,
                             ids,
                             position: MatchPosition::Word { word_position, token_position },
                         });
@@ -224,19 +227,15 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                 .iter()
                 .map(|m| MatchBounds {
                     start: tokens[m.get_first_token_pos()].byte_start,
-                    length: m.match_len,
+                    // TODO: Why is this in chars, while start is in bytes?
+                    length: m.char_count,
                 })
                 .collect(),
         }
     }
 
     /// Returns the bounds in byte index of the crop window.
-    fn crop_bounds(
-        &self,
-        tokens: &[Token<'_>],
-        matches: &[Match],
-        crop_size: usize,
-    ) -> (usize, usize) {
+    fn crop_bounds(&self, tokens: &[Token<'_>], matches: &[Match], crop_size: usize) -> [usize; 2] {
         let (
             mut remaining_words,
             is_iterating_forward,
@@ -371,7 +370,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
         let crop_byte_start = before_tokens.next().map_or(0, |t| t.byte_end);
         let crop_byte_end = after_tokens.next().map_or(self.text.len(), |t| t.byte_start);
 
-        (crop_byte_start, crop_byte_end)
+        [crop_byte_start, crop_byte_end]
     }
 
     // Returns the formatted version of the original text.
@@ -382,78 +381,87 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
         } else {
             match &self.matches {
                 Some((tokens, matches)) => {
-                    // If the text has to be cropped,
-                    // crop around the best interval.
-                    let (byte_start, byte_end) = match format_options.crop {
+                    // If the text has to be cropped, crop around the best interval.
+                    let [crop_byte_start, crop_byte_end] = match format_options.crop {
                         Some(crop_size) if crop_size > 0 => {
                             self.crop_bounds(tokens, matches, crop_size)
                         }
-                        _ => (0, self.text.len()),
+                        _ => [0, self.text.len()],
                     };
 
                     let mut formatted = Vec::new();
 
                     // push crop marker if it's not the start of the text.
-                    if byte_start > 0 && !self.crop_marker.is_empty() {
+                    if crop_byte_start > 0 && !self.crop_marker.is_empty() {
                         formatted.push(self.crop_marker);
                     }
 
-                    let mut byte_index = byte_start;
+                    let mut byte_index = crop_byte_start;
 
                     if format_options.highlight {
                         // insert highlight markers around matches.
                         for m in matches {
-                            let (current_byte_start, current_byte_end) = match m.position {
+                            let [m_byte_start, m_byte_end] = match m.position {
                                 MatchPosition::Word { token_position, .. } => {
                                     let token = &tokens[token_position];
-                                    (&token.byte_start, &token.byte_end)
+                                    [&token.byte_start, &token.byte_end]
                                 }
                                 MatchPosition::Phrase { token_positions: [ftp, ltp], .. } => {
-                                    (&tokens[ftp].byte_start, &tokens[ltp].byte_end)
+                                    [&tokens[ftp].byte_start, &tokens[ltp].byte_end]
                                 }
                             };
 
-                            // skip matches out of the crop window.
-                            if *current_byte_start < byte_start || *current_byte_end > byte_end {
+                            // skip matches out of the crop window
+                            if *m_byte_end < crop_byte_start || *m_byte_start > crop_byte_end {
                                 continue;
                             }
 
-                            if byte_index < *current_byte_start {
-                                formatted.push(&self.text[byte_index..*current_byte_start]);
+                            // adjust start and end to the crop window size
+                            let [m_byte_start, m_byte_end] = [
+                                max(m_byte_start, &crop_byte_start),
+                                min(m_byte_end, &crop_byte_end),
+                            ];
+
+                            // push text that is positioned before our matches
+                            if byte_index < *m_byte_start {
+                                formatted.push(&self.text[byte_index..*m_byte_start]);
                             }
 
-                            let highlight_byte_index = self.text[*current_byte_start..]
-                                .char_indices()
-                                .enumerate()
-                                .find(|(i, _)| *i == m.match_len)
-                                .map_or(*current_byte_end, |(_, (i, _))| i + *current_byte_start);
-
                             formatted.push(self.highlight_prefix);
-                            formatted.push(&self.text[*current_byte_start..highlight_byte_index]);
+
+                            // TODO: This is additional work done, charabia::token::Token byte_len
+                            // should already get us the original byte length, however, that doesn't work as
+                            // it's supposed to, investigate why
+                            let highlight_byte_index = self.text[*m_byte_start..]
+                                .char_indices()
+                                .nth(m.char_count)
+                                .map_or(*m_byte_end, |(i, _)| min(i + *m_byte_start, *m_byte_end));
+                            formatted.push(&self.text[*m_byte_start..highlight_byte_index]);
+
                             formatted.push(self.highlight_suffix);
 
                             // if it's a prefix highlight, we put the end of the word after the highlight marker.
-                            if highlight_byte_index < *current_byte_end {
-                                formatted.push(&self.text[highlight_byte_index..*current_byte_end]);
+                            if highlight_byte_index < *m_byte_end {
+                                formatted.push(&self.text[highlight_byte_index..*m_byte_end]);
                             }
 
-                            byte_index = *current_byte_end;
+                            byte_index = *m_byte_end;
                         }
                     }
 
                     // push the rest of the text between last match and the end of crop.
-                    if byte_index < byte_end {
-                        formatted.push(&self.text[byte_index..byte_end]);
+                    if byte_index < crop_byte_end {
+                        formatted.push(&self.text[byte_index..crop_byte_end]);
                     }
 
                     // push crop marker if it's not the end of the text.
-                    if byte_end < self.text.len() && !self.crop_marker.is_empty() {
+                    if crop_byte_end < self.text.len() && !self.crop_marker.is_empty() {
                         formatted.push(self.crop_marker);
                     }
 
                     if formatted.len() == 1 {
                         // avoid concatenating if there is already 1 slice.
-                        Cow::Borrowed(&self.text[byte_start..byte_end])
+                        Cow::Borrowed(&self.text[crop_byte_start..crop_byte_end])
                     } else {
                         Cow::Owned(formatted.concat())
                     }
@@ -825,8 +833,7 @@ mod tests {
         let mut matcher = builder.build(text, None);
         insta::assert_snapshot!(
             matcher.format(format_options),
-            // @TODO: Should probably highlight it all, even if it didn't fit the whole phrase
-            @"The groundbreaking invention had the power to split the world…"
+            @"<em>The groundbreaking invention had the power to split the world</em>…"
         );
 
         let builder = MatcherBuilder::new_test(
@@ -837,7 +844,7 @@ mod tests {
         let mut matcher = builder.build(text, None);
         insta::assert_snapshot!(
             matcher.format(format_options),
-            // @TODO: Should probably include end of string in this case?
+            // TODO: Should include exclamation mark without crop markers
             @"…between those who <em>embraced progress and those who resisted change</em>…"
         );
 
@@ -860,8 +867,7 @@ mod tests {
         let mut matcher = builder.build(text, None);
         insta::assert_snapshot!(
             matcher.format(format_options),
-            // @TODO: "invention" should be highlighted as well
-            @"…invention <em>had the power to split the world between those</em>…"
+            @"…<em>invention</em> <em>had the power to split the world between those</em>…"
         );
     }
 

From c94679bde6993f91418e4113852ce9c667a198f8 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Sun, 20 Oct 2024 17:24:12 +0200
Subject: [PATCH 69/92] apply review comments

---
 meilisearch/src/routes/indexes/documents.rs | 56 +++++++++++++--------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs
index 854fa5b69..60014bae4 100644
--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@@ -107,11 +107,8 @@ aggregate_methods!(
     DocumentsPOST => "Documents Fetched POST",
 );
 
-#[derive(Default, Serialize)]
+#[derive(Serialize)]
 pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
-    #[serde(rename = "requests.total_received")]
-    total_received: usize,
-
     // a call on ../documents/:doc_id
     per_document_id: bool,
     // if a filter was used
@@ -145,7 +142,6 @@ impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
         };
 
         Self {
-            total_received: 1,
             per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
             per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
             max_limit: limit,
@@ -164,7 +160,6 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
 
     fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
         Box::new(Self {
-            total_received: self.total_received.saturating_add(other.total_received),
             per_document_id: self.per_document_id | other.per_document_id,
             per_filter: self.per_filter | other.per_filter,
             retrieve_vectors: self.retrieve_vectors | other.retrieve_vectors,
@@ -199,7 +194,11 @@ pub async fn get_document(
     analytics.publish(
         DocumentsFetchAggregator::<DocumentsGET> {
             retrieve_vectors: param_retrieve_vectors.0,
-            ..Default::default()
+            per_document_id: true,
+            per_filter: false,
+            max_limit: 0,
+            max_offset: 0,
+            marker: PhantomData,
         },
         &req,
     );
@@ -211,10 +210,8 @@ pub async fn get_document(
     Ok(HttpResponse::Ok().json(document))
 }
 
-#[derive(Default, Serialize)]
+#[derive(Serialize)]
 pub struct DocumentsDeletionAggregator {
-    #[serde(rename = "requests.total_received")]
-    total_received: usize,
     per_document_id: bool,
     clear_all: bool,
     per_batch: bool,
@@ -228,7 +225,6 @@ impl Aggregate for DocumentsDeletionAggregator {
 
     fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
         Box::new(Self {
-            total_received: self.total_received.saturating_add(other.total_received),
             per_document_id: self.per_document_id | other.per_document_id,
             clear_all: self.clear_all | other.clear_all,
             per_batch: self.per_batch | other.per_batch,
@@ -253,9 +249,10 @@ pub async fn delete_document(
 
     analytics.publish(
         DocumentsDeletionAggregator {
-            total_received: 1,
             per_document_id: true,
-            ..Default::default()
+            clear_all: false,
+            per_batch: false,
+            per_filter: false,
         },
         &req,
     );
@@ -316,12 +313,12 @@ pub async fn documents_by_query_post(
 
     analytics.publish(
         DocumentsFetchAggregator::<DocumentsPOST> {
-            total_received: 1,
             per_filter: body.filter.is_some(),
             retrieve_vectors: body.retrieve_vectors,
             max_limit: body.limit,
             max_offset: body.offset,
-            ..Default::default()
+            per_document_id: false,
+            marker: PhantomData,
         },
         &req,
     );
@@ -358,12 +355,12 @@ pub async fn get_documents(
 
     analytics.publish(
         DocumentsFetchAggregator::<DocumentsGET> {
-            total_received: 1,
             per_filter: query.filter.is_some(),
             retrieve_vectors: query.retrieve_vectors,
             max_limit: query.limit,
             max_offset: query.offset,
-            ..Default::default()
+            per_document_id: false,
+            marker: PhantomData,
         },
         &req,
     );
@@ -426,7 +423,7 @@ aggregate_methods!(
     Updated => "Documents Updated",
 );
 
-#[derive(Default, Serialize)]
+#[derive(Serialize)]
 pub struct DocumentsAggregator<T: AggregateMethod> {
     payload_types: HashSet<String>,
     primary_key: HashSet<String>,
@@ -718,7 +715,12 @@ pub async fn delete_documents_batch(
     let index_uid = IndexUid::try_from(index_uid.into_inner())?;
 
     analytics.publish(
-        DocumentsDeletionAggregator { total_received: 1, per_batch: true, ..Default::default() },
+        DocumentsDeletionAggregator {
+            per_batch: true,
+            per_document_id: false,
+            clear_all: false,
+            per_filter: false,
+        },
         &req,
     );
 
@@ -761,7 +763,12 @@ pub async fn delete_documents_by_filter(
     let filter = body.into_inner().filter;
 
     analytics.publish(
-        DocumentsDeletionAggregator { total_received: 1, per_filter: true, ..Default::default() },
+        DocumentsDeletionAggregator {
+            per_filter: true,
+            per_document_id: false,
+            clear_all: false,
+            per_batch: false,
+        },
         &req,
     );
 
@@ -793,7 +800,7 @@ pub struct DocumentEditionByFunction {
     pub function: String,
 }
 
-#[derive(Default, Serialize)]
+#[derive(Serialize)]
 struct EditDocumentsByFunctionAggregator {
     // Set to true if at least one request was filtered
     filtered: bool,
@@ -899,7 +906,12 @@ pub async fn clear_all_documents(
 ) -> Result<HttpResponse, ResponseError> {
     let index_uid = IndexUid::try_from(index_uid.into_inner())?;
     analytics.publish(
-        DocumentsDeletionAggregator { total_received: 1, clear_all: true, ..Default::default() },
+        DocumentsDeletionAggregator {
+            clear_all: true,
+            per_document_id: false,
+            per_batch: false,
+            per_filter: false,
+        },
         &req,
     );
 

From 73b57228967dffe4a3da7214f2f6bc3ebb15cf5c Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Sun, 20 Oct 2024 17:31:21 +0200
Subject: [PATCH 70/92] rename the other parameter of the aggregate method to
 new to avoid confusion

---
 meilisearch/src/analytics/mod.rs              | 12 +--
 .../src/analytics/segment_analytics.rs        | 26 +++---
 meilisearch/src/routes/features.rs            | 12 +--
 meilisearch/src/routes/indexes/documents.rs   | 38 ++++----
 .../src/routes/indexes/facet_search.rs        | 12 +--
 meilisearch/src/routes/indexes/mod.rs         | 12 +--
 .../src/routes/indexes/settings_analytics.rs  | 86 +++++++++----------
 meilisearch/src/routes/swap_indexes.rs        |  4 +-
 meilisearch/src/routes/tasks.rs               | 24 +++---
 9 files changed, 108 insertions(+), 118 deletions(-)

diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs
index 48ac13fc0..27203ea71 100644
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@@ -89,7 +89,7 @@ pub trait Aggregate: 'static + mopa::Any + Send {
     fn event_name(&self) -> &'static str;
 
     /// Will be called every time an event has been used twice before segment flushed its buffer.
-    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self>
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self>
     where
         Self: Sized;
 
@@ -97,16 +97,16 @@ pub trait Aggregate: 'static + mopa::Any + Send {
     /// This function should always be called on the same type. If `this` and `other`
     /// aren't the same type behind the function will do nothing and return `None`.
     fn downcast_aggregate(
-        this: Box<dyn Aggregate>,
-        other: Box<dyn Aggregate>,
+        old: Box<dyn Aggregate>,
+        new: Box<dyn Aggregate>,
     ) -> Option<Box<dyn Aggregate>>
     where
         Self: Sized,
     {
-        if this.is::<Self>() && other.is::<Self>() {
+        if old.is::<Self>() && new.is::<Self>() {
             // Both the two following lines cannot fail, but just to be sure we don't crash, we're still avoiding unwrapping
-            let this = this.downcast::<Self>().ok()?;
-            let other = other.downcast::<Self>().ok()?;
+            let this = old.downcast::<Self>().ok()?;
+            let other = new.downcast::<Self>().ok()?;
             Some(Self::aggregate(this, other))
         } else {
             None
diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 10927f49b..328a3a048 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -702,7 +702,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
         Method::event_name()
     }
 
-    fn aggregate(mut self: Box<Self>, other: Box<Self>) -> Box<Self> {
+    fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
         let Self {
             total_received,
             total_succeeded,
@@ -743,7 +743,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
             ranking_score_threshold,
             mut locales,
             marker: _,
-        } = *other;
+        } = *new;
 
         // request
         self.total_received = self.total_received.saturating_add(total_received);
@@ -1038,22 +1038,22 @@ impl Aggregate for MultiSearchAggregator {
     }
 
     /// Aggregate one [MultiSearchAggregator] into another.
-    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
         // write the aggregate in a way that will cause a compilation error if a field is added.
 
         // get ownership of self, replacing it by a default value.
         let this = *self;
 
-        let total_received = this.total_received.saturating_add(other.total_received);
-        let total_succeeded = this.total_succeeded.saturating_add(other.total_succeeded);
+        let total_received = this.total_received.saturating_add(new.total_received);
+        let total_succeeded = this.total_succeeded.saturating_add(new.total_succeeded);
         let total_distinct_index_count =
-            this.total_distinct_index_count.saturating_add(other.total_distinct_index_count);
-        let total_single_index = this.total_single_index.saturating_add(other.total_single_index);
-        let total_search_count = this.total_search_count.saturating_add(other.total_search_count);
-        let show_ranking_score = this.show_ranking_score || other.show_ranking_score;
+            this.total_distinct_index_count.saturating_add(new.total_distinct_index_count);
+        let total_single_index = this.total_single_index.saturating_add(new.total_single_index);
+        let total_search_count = this.total_search_count.saturating_add(new.total_search_count);
+        let show_ranking_score = this.show_ranking_score || new.show_ranking_score;
         let show_ranking_score_details =
-            this.show_ranking_score_details || other.show_ranking_score_details;
-        let use_federation = this.use_federation || other.use_federation;
+            this.show_ranking_score_details || new.show_ranking_score_details;
+        let use_federation = this.use_federation || new.use_federation;
 
         Box::new(Self {
             total_received,
@@ -1215,7 +1215,7 @@ impl<Method: AggregateMethod> Aggregate for SimilarAggregator<Method> {
     }
 
     /// Aggregate one [SimilarAggregator] into another.
-    fn aggregate(mut self: Box<Self>, other: Box<Self>) -> Box<Self> {
+    fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
         let Self {
             total_received,
             total_succeeded,
@@ -1233,7 +1233,7 @@ impl<Method: AggregateMethod> Aggregate for SimilarAggregator<Method> {
             ranking_score_threshold,
             retrieve_vectors,
             marker: _,
-        } = *other;
+        } = *new;
 
         // request
         self.total_received = self.total_received.saturating_add(total_received);
diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs
index 8bdb3ffb3..5d93adc02 100644
--- a/meilisearch/src/routes/features.rs
+++ b/meilisearch/src/routes/features.rs
@@ -64,13 +64,13 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
         "Experimental features Updated"
     }
 
-    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
         Box::new(Self {
-            vector_store: other.vector_store,
-            metrics: other.metrics,
-            logs_route: other.logs_route,
-            edit_documents_by_function: other.edit_documents_by_function,
-            contains_filter: other.contains_filter,
+            vector_store: new.vector_store,
+            metrics: new.metrics,
+            logs_route: new.logs_route,
+            edit_documents_by_function: new.edit_documents_by_function,
+            contains_filter: new.contains_filter,
         })
     }
 
diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs
index 60014bae4..47f73ef42 100644
--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@@ -158,13 +158,13 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
         Method::event_name()
     }
 
-    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
         Box::new(Self {
-            per_document_id: self.per_document_id | other.per_document_id,
-            per_filter: self.per_filter | other.per_filter,
-            retrieve_vectors: self.retrieve_vectors | other.retrieve_vectors,
-            max_limit: self.max_limit.max(other.max_limit),
-            max_offset: self.max_offset.max(other.max_offset),
+            per_document_id: self.per_document_id | new.per_document_id,
+            per_filter: self.per_filter | new.per_filter,
+            retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
+            max_limit: self.max_limit.max(new.max_limit),
+            max_offset: self.max_offset.max(new.max_offset),
             marker: PhantomData,
         })
     }
@@ -223,12 +223,12 @@ impl Aggregate for DocumentsDeletionAggregator {
         "Documents Deleted"
     }
 
-    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
         Box::new(Self {
-            per_document_id: self.per_document_id | other.per_document_id,
-            clear_all: self.clear_all | other.clear_all,
-            per_batch: self.per_batch | other.per_batch,
-            per_filter: self.per_filter | other.per_filter,
+            per_document_id: self.per_document_id | new.per_document_id,
+            clear_all: self.clear_all | new.clear_all,
+            per_batch: self.per_batch | new.per_batch,
+            per_filter: self.per_filter | new.per_filter,
         })
     }
 
@@ -437,11 +437,11 @@ impl<Method: AggregateMethod> Aggregate for DocumentsAggregator<Method> {
         Method::event_name()
     }
 
-    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
         Box::new(Self {
-            payload_types: self.payload_types.union(&other.payload_types).cloned().collect(),
-            primary_key: self.primary_key.union(&other.primary_key).cloned().collect(),
-            index_creation: self.index_creation | other.index_creation,
+            payload_types: self.payload_types.union(&new.payload_types).cloned().collect(),
+            primary_key: self.primary_key.union(&new.primary_key).cloned().collect(),
+            index_creation: self.index_creation | new.index_creation,
             method: PhantomData,
         })
     }
@@ -815,11 +815,11 @@ impl Aggregate for EditDocumentsByFunctionAggregator {
         "Documents Edited By Function"
     }
 
-    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
         Box::new(Self {
-            filtered: self.filtered | other.filtered,
-            with_context: self.with_context | other.with_context,
-            index_creation: self.index_creation | other.index_creation,
+            filtered: self.filtered | new.filtered,
+            with_context: self.with_context | new.with_context,
+            index_creation: self.index_creation | new.index_creation,
         })
     }
 
diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs
index 8e40397c7..99a4a4f28 100644
--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@@ -113,18 +113,18 @@ impl Aggregate for FacetSearchAggregator {
         "Facet Searched POST"
     }
 
-    fn aggregate(mut self: Box<Self>, other: Box<Self>) -> Box<Self> {
-        for time in other.time_spent {
+    fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        for time in new.time_spent {
             self.time_spent.push(time);
         }
 
         Box::new(Self {
-            total_received: self.total_received.saturating_add(other.total_received),
-            total_succeeded: self.total_succeeded.saturating_add(other.total_succeeded),
+            total_received: self.total_received.saturating_add(new.total_received),
+            total_succeeded: self.total_succeeded.saturating_add(new.total_succeeded),
             time_spent: self.time_spent,
-            facet_names: self.facet_names.union(&other.facet_names).cloned().collect(),
+            facet_names: self.facet_names.union(&new.facet_names).cloned().collect(),
             additional_search_parameters_provided: self.additional_search_parameters_provided
-                | other.additional_search_parameters_provided,
+                | new.additional_search_parameters_provided,
         })
     }
 
diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs
index 65c81a57e..c8183186d 100644
--- a/meilisearch/src/routes/indexes/mod.rs
+++ b/meilisearch/src/routes/indexes/mod.rs
@@ -134,10 +134,8 @@ impl Aggregate for IndexCreatedAggregate {
         "Index Created"
     }
 
-    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
-        Box::new(Self {
-            primary_key: self.primary_key.union(&other.primary_key).cloned().collect(),
-        })
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        Box::new(Self { primary_key: self.primary_key.union(&new.primary_key).cloned().collect() })
     }
 
     fn into_event(self: Box<Self>) -> serde_json::Value {
@@ -225,10 +223,8 @@ impl Aggregate for IndexUpdatedAggregate {
         "Index Updated"
     }
 
-    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
-        Box::new(Self {
-            primary_key: self.primary_key.union(&other.primary_key).cloned().collect(),
-        })
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        Box::new(Self { primary_key: self.primary_key.union(&new.primary_key).cloned().collect() })
     }
 
     fn into_event(self: Box<Self>) -> serde_json::Value {
diff --git a/meilisearch/src/routes/indexes/settings_analytics.rs b/meilisearch/src/routes/indexes/settings_analytics.rs
index 636ef3c57..e7d44fa20 100644
--- a/meilisearch/src/routes/indexes/settings_analytics.rs
+++ b/meilisearch/src/routes/indexes/settings_analytics.rs
@@ -42,114 +42,108 @@ impl Aggregate for SettingsAnalytics {
         "Settings Updated"
     }
 
-    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
         Box::new(Self {
             ranking_rules: RankingRulesAnalytics {
                 words_position: self
                     .ranking_rules
                     .words_position
-                    .or(other.ranking_rules.words_position),
-                typo_position: self
-                    .ranking_rules
-                    .typo_position
-                    .or(other.ranking_rules.typo_position),
+                    .or(new.ranking_rules.words_position),
+                typo_position: self.ranking_rules.typo_position.or(new.ranking_rules.typo_position),
                 proximity_position: self
                     .ranking_rules
                     .proximity_position
-                    .or(other.ranking_rules.proximity_position),
+                    .or(new.ranking_rules.proximity_position),
                 attribute_position: self
                     .ranking_rules
                     .attribute_position
-                    .or(other.ranking_rules.attribute_position),
-                sort_position: self
-                    .ranking_rules
-                    .sort_position
-                    .or(other.ranking_rules.sort_position),
+                    .or(new.ranking_rules.attribute_position),
+                sort_position: self.ranking_rules.sort_position.or(new.ranking_rules.sort_position),
                 exactness_position: self
                     .ranking_rules
                     .exactness_position
-                    .or(other.ranking_rules.exactness_position),
-                values: self.ranking_rules.values.or(other.ranking_rules.values),
+                    .or(new.ranking_rules.exactness_position),
+                values: self.ranking_rules.values.or(new.ranking_rules.values),
             },
             searchable_attributes: SearchableAttributesAnalytics {
-                total: self.searchable_attributes.total.or(other.searchable_attributes.total),
+                total: self.searchable_attributes.total.or(new.searchable_attributes.total),
                 with_wildcard: self
                     .searchable_attributes
                     .with_wildcard
-                    .or(other.searchable_attributes.with_wildcard),
+                    .or(new.searchable_attributes.with_wildcard),
             },
             displayed_attributes: DisplayedAttributesAnalytics {
-                total: self.displayed_attributes.total.or(other.displayed_attributes.total),
+                total: self.displayed_attributes.total.or(new.displayed_attributes.total),
                 with_wildcard: self
                     .displayed_attributes
                     .with_wildcard
-                    .or(other.displayed_attributes.with_wildcard),
+                    .or(new.displayed_attributes.with_wildcard),
             },
             sortable_attributes: SortableAttributesAnalytics {
-                total: self.sortable_attributes.total.or(other.sortable_attributes.total),
-                has_geo: self.sortable_attributes.has_geo.or(other.sortable_attributes.has_geo),
+                total: self.sortable_attributes.total.or(new.sortable_attributes.total),
+                has_geo: self.sortable_attributes.has_geo.or(new.sortable_attributes.has_geo),
             },
             filterable_attributes: FilterableAttributesAnalytics {
-                total: self.filterable_attributes.total.or(other.filterable_attributes.total),
-                has_geo: self.filterable_attributes.has_geo.or(other.filterable_attributes.has_geo),
+                total: self.filterable_attributes.total.or(new.filterable_attributes.total),
+                has_geo: self.filterable_attributes.has_geo.or(new.filterable_attributes.has_geo),
             },
             distinct_attribute: DistinctAttributeAnalytics {
-                set: self.distinct_attribute.set | other.distinct_attribute.set,
+                set: self.distinct_attribute.set | new.distinct_attribute.set,
             },
             proximity_precision: ProximityPrecisionAnalytics {
-                set: self.proximity_precision.set | other.proximity_precision.set,
-                value: self.proximity_precision.value.or(other.proximity_precision.value),
+                set: self.proximity_precision.set | new.proximity_precision.set,
+                value: self.proximity_precision.value.or(new.proximity_precision.value),
             },
             typo_tolerance: TypoToleranceAnalytics {
-                enabled: self.typo_tolerance.enabled.or(other.typo_tolerance.enabled),
+                enabled: self.typo_tolerance.enabled.or(new.typo_tolerance.enabled),
                 disable_on_attributes: self
                     .typo_tolerance
                     .disable_on_attributes
-                    .or(other.typo_tolerance.disable_on_attributes),
+                    .or(new.typo_tolerance.disable_on_attributes),
                 disable_on_words: self
                     .typo_tolerance
                     .disable_on_words
-                    .or(other.typo_tolerance.disable_on_words),
+                    .or(new.typo_tolerance.disable_on_words),
                 min_word_size_for_one_typo: self
                     .typo_tolerance
                     .min_word_size_for_one_typo
-                    .or(other.typo_tolerance.min_word_size_for_one_typo),
+                    .or(new.typo_tolerance.min_word_size_for_one_typo),
                 min_word_size_for_two_typos: self
                     .typo_tolerance
                     .min_word_size_for_two_typos
-                    .or(other.typo_tolerance.min_word_size_for_two_typos),
+                    .or(new.typo_tolerance.min_word_size_for_two_typos),
             },
             faceting: FacetingAnalytics {
                 max_values_per_facet: self
                     .faceting
                     .max_values_per_facet
-                    .or(other.faceting.max_values_per_facet),
+                    .or(new.faceting.max_values_per_facet),
                 sort_facet_values_by_star_count: self
                     .faceting
                     .sort_facet_values_by_star_count
-                    .or(other.faceting.sort_facet_values_by_star_count),
+                    .or(new.faceting.sort_facet_values_by_star_count),
                 sort_facet_values_by_total: self
                     .faceting
                     .sort_facet_values_by_total
-                    .or(other.faceting.sort_facet_values_by_total),
+                    .or(new.faceting.sort_facet_values_by_total),
             },
             pagination: PaginationAnalytics {
-                max_total_hits: self.pagination.max_total_hits.or(other.pagination.max_total_hits),
+                max_total_hits: self.pagination.max_total_hits.or(new.pagination.max_total_hits),
             },
             stop_words: StopWordsAnalytics {
-                total: self.stop_words.total.or(other.stop_words.total),
+                total: self.stop_words.total.or(new.stop_words.total),
             },
-            synonyms: SynonymsAnalytics { total: self.synonyms.total.or(other.synonyms.total) },
+            synonyms: SynonymsAnalytics { total: self.synonyms.total.or(new.synonyms.total) },
             embedders: EmbeddersAnalytics {
-                total: self.embedders.total.or(other.embedders.total),
-                sources: match (self.embedders.sources, other.embedders.sources) {
+                total: self.embedders.total.or(new.embedders.total),
+                sources: match (self.embedders.sources, new.embedders.sources) {
                     (None, None) => None,
                     (Some(sources), None) | (None, Some(sources)) => Some(sources),
                     (Some(this), Some(other)) => Some(this.union(&other).cloned().collect()),
                 },
                 document_template_used: match (
                     self.embedders.document_template_used,
-                    other.embedders.document_template_used,
+                    new.embedders.document_template_used,
                 ) {
                     (None, None) => None,
                     (Some(used), None) | (None, Some(used)) => Some(used),
@@ -157,7 +151,7 @@ impl Aggregate for SettingsAnalytics {
                 },
                 document_template_max_bytes: match (
                     self.embedders.document_template_max_bytes,
-                    other.embedders.document_template_max_bytes,
+                    new.embedders.document_template_max_bytes,
                 ) {
                     (None, None) => None,
                     (Some(bytes), None) | (None, Some(bytes)) => Some(bytes),
@@ -165,7 +159,7 @@ impl Aggregate for SettingsAnalytics {
                 },
                 binary_quantization_used: match (
                     self.embedders.binary_quantization_used,
-                    other.embedders.binary_quantization_used,
+                    new.embedders.binary_quantization_used,
                 ) {
                     (None, None) => None,
                     (Some(bq), None) | (None, Some(bq)) => Some(bq),
@@ -176,17 +170,17 @@ impl Aggregate for SettingsAnalytics {
                 search_cutoff_ms: self
                     .search_cutoff_ms
                     .search_cutoff_ms
-                    .or(other.search_cutoff_ms.search_cutoff_ms),
+                    .or(new.search_cutoff_ms.search_cutoff_ms),
             },
-            locales: LocalesAnalytics { locales: self.locales.locales.or(other.locales.locales) },
+            locales: LocalesAnalytics { locales: self.locales.locales.or(new.locales.locales) },
             dictionary: DictionaryAnalytics {
-                total: self.dictionary.total.or(other.dictionary.total),
+                total: self.dictionary.total.or(new.dictionary.total),
             },
             separator_tokens: SeparatorTokensAnalytics {
-                total: self.separator_tokens.total.or(other.non_separator_tokens.total),
+                total: self.separator_tokens.total.or(new.non_separator_tokens.total),
             },
             non_separator_tokens: NonSeparatorTokensAnalytics {
-                total: self.non_separator_tokens.total.or(other.non_separator_tokens.total),
+                total: self.non_separator_tokens.total.or(new.non_separator_tokens.total),
             },
         })
     }
diff --git a/meilisearch/src/routes/swap_indexes.rs b/meilisearch/src/routes/swap_indexes.rs
index f7d8f4eff..9b8b67e63 100644
--- a/meilisearch/src/routes/swap_indexes.rs
+++ b/meilisearch/src/routes/swap_indexes.rs
@@ -39,9 +39,9 @@ impl Aggregate for IndexSwappedAnalytics {
         "Indexes Swapped"
     }
 
-    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
         Box::new(Self {
-            swap_operation_number: self.swap_operation_number.max(other.swap_operation_number),
+            swap_operation_number: self.swap_operation_number.max(new.swap_operation_number),
         })
     }
 
diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs
index ff4aee998..712b8ecde 100644
--- a/meilisearch/src/routes/tasks.rs
+++ b/meilisearch/src/routes/tasks.rs
@@ -185,25 +185,25 @@ impl<Method: AggregateMethod + 'static> Aggregate for TaskFilterAnalytics<Method
         Method::event_name()
     }
 
-    fn aggregate(self: Box<Self>, other: Box<Self>) -> Box<Self> {
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
         Box::new(Self {
-            filtered_by_uid: self.filtered_by_uid | other.filtered_by_uid,
-            filtered_by_index_uid: self.filtered_by_index_uid | other.filtered_by_index_uid,
-            filtered_by_type: self.filtered_by_type | other.filtered_by_type,
-            filtered_by_status: self.filtered_by_status | other.filtered_by_status,
-            filtered_by_canceled_by: self.filtered_by_canceled_by | other.filtered_by_canceled_by,
+            filtered_by_uid: self.filtered_by_uid | new.filtered_by_uid,
+            filtered_by_index_uid: self.filtered_by_index_uid | new.filtered_by_index_uid,
+            filtered_by_type: self.filtered_by_type | new.filtered_by_type,
+            filtered_by_status: self.filtered_by_status | new.filtered_by_status,
+            filtered_by_canceled_by: self.filtered_by_canceled_by | new.filtered_by_canceled_by,
             filtered_by_before_enqueued_at: self.filtered_by_before_enqueued_at
-                | other.filtered_by_before_enqueued_at,
+                | new.filtered_by_before_enqueued_at,
             filtered_by_after_enqueued_at: self.filtered_by_after_enqueued_at
-                | other.filtered_by_after_enqueued_at,
+                | new.filtered_by_after_enqueued_at,
             filtered_by_before_started_at: self.filtered_by_before_started_at
-                | other.filtered_by_before_started_at,
+                | new.filtered_by_before_started_at,
             filtered_by_after_started_at: self.filtered_by_after_started_at
-                | other.filtered_by_after_started_at,
+                | new.filtered_by_after_started_at,
             filtered_by_before_finished_at: self.filtered_by_before_finished_at
-                | other.filtered_by_before_finished_at,
+                | new.filtered_by_before_finished_at,
             filtered_by_after_finished_at: self.filtered_by_after_finished_at
-                | other.filtered_by_after_finished_at,
+                | new.filtered_by_after_finished_at,
 
             marker: std::marker::PhantomData,
         })

From ac919df37dff4dda34ae2687517bb4b1a6b2b4cf Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Sun, 20 Oct 2024 17:36:29 +0200
Subject: [PATCH 71/92] simplify the trait a bit more by getting rids of the
 downcast_aggregate method

---
 meilisearch/src/analytics/mod.rs              | 20 -------------------
 .../src/analytics/segment_analytics.rs        | 18 ++++++++++++++++-
 2 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs
index 27203ea71..d72ab9d01 100644
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@@ -93,26 +93,6 @@ pub trait Aggregate: 'static + mopa::Any + Send {
     where
         Self: Sized;
 
-    /// An internal helper function, you shouldn't implement it yourself.
-    /// This function should always be called on the same type. If `this` and `other`
-    /// aren't the same type behind the function will do nothing and return `None`.
-    fn downcast_aggregate(
-        old: Box<dyn Aggregate>,
-        new: Box<dyn Aggregate>,
-    ) -> Option<Box<dyn Aggregate>>
-    where
-        Self: Sized,
-    {
-        if old.is::<Self>() && new.is::<Self>() {
-            // Both the two following lines cannot fail, but just to be sure we don't crash, we're still avoiding unwrapping
-            let this = old.downcast::<Self>().ok()?;
-            let other = new.downcast::<Self>().ok()?;
-            Some(Self::aggregate(this, other))
-        } else {
-            None
-        }
-    }
-
     /// Converts your structure to the final event that'll be sent to segment.
     fn into_event(self: Box<Self>) -> serde_json::Value;
 }
diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 328a3a048..96a0a676c 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -82,6 +82,22 @@ pub struct Event {
     total: usize,
 }
 
+/// This function should always be called on the same type. If `this` and `other`
+/// aren't the same type the function will do nothing and return `None`.
+fn downcast_aggregate<ConcreteType: Aggregate>(
+    old: Box<dyn Aggregate>,
+    new: Box<dyn Aggregate>,
+) -> Option<Box<dyn Aggregate>> {
+    if old.is::<ConcreteType>() && new.is::<ConcreteType>() {
+        // Both the two following lines cannot fail, but just to be sure we don't crash, we're still avoiding unwrapping
+        let this = old.downcast::<ConcreteType>().ok()?;
+        let other = new.downcast::<ConcreteType>().ok()?;
+        Some(ConcreteType::aggregate(this, other))
+    } else {
+        None
+    }
+}
+
 impl Message {
     pub fn new<T: Aggregate>(event: T, request: &HttpRequest) -> Self {
         Self {
@@ -92,7 +108,7 @@ impl Message {
                 user_agents: extract_user_agents(request),
                 total: 1,
             },
-            aggregator_function: T::downcast_aggregate,
+            aggregator_function: downcast_aggregate::<T>,
         }
     }
 }

From af589c85ec4746ef38a38420e0b6d433b1dc86d2 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Sun, 20 Oct 2024 17:40:31 +0200
Subject: [PATCH 72/92] reverse all the settings to keep the last one received
 instead of the first one received in case we receive the same setting
 multiple times

---
 .../src/routes/indexes/settings_analytics.rs  | 94 +++++++++----------
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/meilisearch/src/routes/indexes/settings_analytics.rs b/meilisearch/src/routes/indexes/settings_analytics.rs
index e7d44fa20..de01b72e8 100644
--- a/meilisearch/src/routes/indexes/settings_analytics.rs
+++ b/meilisearch/src/routes/indexes/settings_analytics.rs
@@ -45,97 +45,97 @@ impl Aggregate for SettingsAnalytics {
     fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
         Box::new(Self {
             ranking_rules: RankingRulesAnalytics {
-                words_position: self
+                words_position: new
                     .ranking_rules
                     .words_position
-                    .or(new.ranking_rules.words_position),
-                typo_position: self.ranking_rules.typo_position.or(new.ranking_rules.typo_position),
-                proximity_position: self
+                    .or(self.ranking_rules.words_position),
+                typo_position: new.ranking_rules.typo_position.or(self.ranking_rules.typo_position),
+                proximity_position: new
                     .ranking_rules
                     .proximity_position
-                    .or(new.ranking_rules.proximity_position),
-                attribute_position: self
+                    .or(self.ranking_rules.proximity_position),
+                attribute_position: new
                     .ranking_rules
                     .attribute_position
-                    .or(new.ranking_rules.attribute_position),
-                sort_position: self.ranking_rules.sort_position.or(new.ranking_rules.sort_position),
-                exactness_position: self
+                    .or(self.ranking_rules.attribute_position),
+                sort_position: new.ranking_rules.sort_position.or(self.ranking_rules.sort_position),
+                exactness_position: new
                     .ranking_rules
                     .exactness_position
-                    .or(new.ranking_rules.exactness_position),
-                values: self.ranking_rules.values.or(new.ranking_rules.values),
+                    .or(self.ranking_rules.exactness_position),
+                values: new.ranking_rules.values.or(self.ranking_rules.values),
             },
             searchable_attributes: SearchableAttributesAnalytics {
-                total: self.searchable_attributes.total.or(new.searchable_attributes.total),
-                with_wildcard: self
+                total: new.searchable_attributes.total.or(self.searchable_attributes.total),
+                with_wildcard: new
                     .searchable_attributes
                     .with_wildcard
-                    .or(new.searchable_attributes.with_wildcard),
+                    .or(self.searchable_attributes.with_wildcard),
             },
             displayed_attributes: DisplayedAttributesAnalytics {
-                total: self.displayed_attributes.total.or(new.displayed_attributes.total),
-                with_wildcard: self
+                total: new.displayed_attributes.total.or(self.displayed_attributes.total),
+                with_wildcard: new
                     .displayed_attributes
                     .with_wildcard
-                    .or(new.displayed_attributes.with_wildcard),
+                    .or(self.displayed_attributes.with_wildcard),
             },
             sortable_attributes: SortableAttributesAnalytics {
-                total: self.sortable_attributes.total.or(new.sortable_attributes.total),
-                has_geo: self.sortable_attributes.has_geo.or(new.sortable_attributes.has_geo),
+                total: new.sortable_attributes.total.or(self.sortable_attributes.total),
+                has_geo: new.sortable_attributes.has_geo.or(self.sortable_attributes.has_geo),
             },
             filterable_attributes: FilterableAttributesAnalytics {
-                total: self.filterable_attributes.total.or(new.filterable_attributes.total),
-                has_geo: self.filterable_attributes.has_geo.or(new.filterable_attributes.has_geo),
+                total: new.filterable_attributes.total.or(self.filterable_attributes.total),
+                has_geo: new.filterable_attributes.has_geo.or(self.filterable_attributes.has_geo),
             },
             distinct_attribute: DistinctAttributeAnalytics {
                 set: self.distinct_attribute.set | new.distinct_attribute.set,
             },
             proximity_precision: ProximityPrecisionAnalytics {
                 set: self.proximity_precision.set | new.proximity_precision.set,
-                value: self.proximity_precision.value.or(new.proximity_precision.value),
+                value: new.proximity_precision.value.or(self.proximity_precision.value),
             },
             typo_tolerance: TypoToleranceAnalytics {
-                enabled: self.typo_tolerance.enabled.or(new.typo_tolerance.enabled),
-                disable_on_attributes: self
+                enabled: new.typo_tolerance.enabled.or(self.typo_tolerance.enabled),
+                disable_on_attributes: new
                     .typo_tolerance
                     .disable_on_attributes
-                    .or(new.typo_tolerance.disable_on_attributes),
-                disable_on_words: self
+                    .or(self.typo_tolerance.disable_on_attributes),
+                disable_on_words: new
                     .typo_tolerance
                     .disable_on_words
-                    .or(new.typo_tolerance.disable_on_words),
-                min_word_size_for_one_typo: self
+                    .or(self.typo_tolerance.disable_on_words),
+                min_word_size_for_one_typo: new
                     .typo_tolerance
                     .min_word_size_for_one_typo
-                    .or(new.typo_tolerance.min_word_size_for_one_typo),
-                min_word_size_for_two_typos: self
+                    .or(self.typo_tolerance.min_word_size_for_one_typo),
+                min_word_size_for_two_typos: new
                     .typo_tolerance
                     .min_word_size_for_two_typos
-                    .or(new.typo_tolerance.min_word_size_for_two_typos),
+                    .or(self.typo_tolerance.min_word_size_for_two_typos),
             },
             faceting: FacetingAnalytics {
-                max_values_per_facet: self
+                max_values_per_facet: new
                     .faceting
                     .max_values_per_facet
-                    .or(new.faceting.max_values_per_facet),
-                sort_facet_values_by_star_count: self
+                    .or(self.faceting.max_values_per_facet),
+                sort_facet_values_by_star_count: new
                     .faceting
                     .sort_facet_values_by_star_count
-                    .or(new.faceting.sort_facet_values_by_star_count),
-                sort_facet_values_by_total: self
+                    .or(self.faceting.sort_facet_values_by_star_count),
+                sort_facet_values_by_total: new
                     .faceting
                     .sort_facet_values_by_total
-                    .or(new.faceting.sort_facet_values_by_total),
+                    .or(self.faceting.sort_facet_values_by_total),
             },
             pagination: PaginationAnalytics {
-                max_total_hits: self.pagination.max_total_hits.or(new.pagination.max_total_hits),
+                max_total_hits: new.pagination.max_total_hits.or(self.pagination.max_total_hits),
             },
             stop_words: StopWordsAnalytics {
-                total: self.stop_words.total.or(new.stop_words.total),
+                total: new.stop_words.total.or(self.stop_words.total),
             },
-            synonyms: SynonymsAnalytics { total: self.synonyms.total.or(new.synonyms.total) },
+            synonyms: SynonymsAnalytics { total: new.synonyms.total.or(self.synonyms.total) },
             embedders: EmbeddersAnalytics {
-                total: self.embedders.total.or(new.embedders.total),
+                total: new.embedders.total.or(self.embedders.total),
                 sources: match (self.embedders.sources, new.embedders.sources) {
                     (None, None) => None,
                     (Some(sources), None) | (None, Some(sources)) => Some(sources),
@@ -167,20 +167,20 @@ impl Aggregate for SettingsAnalytics {
                 },
             },
             search_cutoff_ms: SearchCutoffMsAnalytics {
-                search_cutoff_ms: self
+                search_cutoff_ms: new
                     .search_cutoff_ms
                     .search_cutoff_ms
-                    .or(new.search_cutoff_ms.search_cutoff_ms),
+                    .or(self.search_cutoff_ms.search_cutoff_ms),
             },
-            locales: LocalesAnalytics { locales: self.locales.locales.or(new.locales.locales) },
+            locales: LocalesAnalytics { locales: new.locales.locales.or(self.locales.locales) },
             dictionary: DictionaryAnalytics {
-                total: self.dictionary.total.or(new.dictionary.total),
+                total: new.dictionary.total.or(self.dictionary.total),
             },
             separator_tokens: SeparatorTokensAnalytics {
-                total: self.separator_tokens.total.or(new.non_separator_tokens.total),
+                total: new.non_separator_tokens.total.or(self.separator_tokens.total),
             },
             non_separator_tokens: NonSeparatorTokensAnalytics {
-                total: self.non_separator_tokens.total.or(new.non_separator_tokens.total),
+                total: new.non_separator_tokens.total.or(self.non_separator_tokens.total),
             },
         })
     }

From 5675585fe8b4f51eed7b08bb30e1fed0f711e340 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Sun, 20 Oct 2024 17:54:43 +0200
Subject: [PATCH 73/92] move all the searches structures to new modules

---
 meilisearch/src/analytics/mod.rs              |   4 -
 .../src/analytics/segment_analytics.rs        | 868 +-----------------
 meilisearch/src/routes/indexes/mod.rs         |   2 +
 meilisearch/src/routes/indexes/search.rs      |   4 +-
 .../src/routes/indexes/search_analytics.rs    | 485 ++++++++++
 meilisearch/src/routes/indexes/similar.rs     |   4 +-
 .../src/routes/indexes/similar_analytics.rs   | 235 +++++
 meilisearch/src/routes/mod.rs                 |   1 +
 meilisearch/src/routes/multi_search.rs        |   4 +-
 .../src/routes/multi_search_analytics.rs      | 170 ++++
 10 files changed, 903 insertions(+), 874 deletions(-)
 create mode 100644 meilisearch/src/routes/indexes/search_analytics.rs
 create mode 100644 meilisearch/src/routes/indexes/similar_analytics.rs
 create mode 100644 meilisearch/src/routes/multi_search_analytics.rs

diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs
index d72ab9d01..bd14b0bfa 100644
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@@ -15,13 +15,9 @@ use platform_dirs::AppDirs;
 
 // if the feature analytics is enabled we use the real analytics
 pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
-pub use segment_analytics::SearchAggregator;
-pub use segment_analytics::SimilarAggregator;
 
 use crate::Opt;
 
-pub use self::segment_analytics::MultiSearchAggregator;
-
 /// A macro used to quickly define events that don't aggregate or send anything besides an empty event with its name.
 #[macro_export]
 macro_rules! empty_analytics {
diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 96a0a676c..7dc746b14 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -1,5 +1,5 @@
 use std::any::TypeId;
-use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet};
+use std::collections::{HashMap, HashSet};
 use std::fs;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
@@ -11,10 +11,8 @@ use byte_unit::Byte;
 use index_scheduler::IndexScheduler;
 use meilisearch_auth::{AuthController, AuthFilter};
 use meilisearch_types::features::RuntimeTogglableFeatures;
-use meilisearch_types::locales::Locale;
 use meilisearch_types::InstanceUid;
 use once_cell::sync::Lazy;
-use regex::Regex;
 use segment::message::{Identify, Track, User};
 use segment::{AutoBatcher, Batcher, HttpClient};
 use serde::Serialize;
@@ -25,17 +23,12 @@ use tokio::select;
 use tokio::sync::mpsc::{self, Receiver, Sender};
 use uuid::Uuid;
 
-use super::{config_user_id_path, Aggregate, AggregateMethod, MEILISEARCH_CONFIG_PATH};
+use super::{config_user_id_path, Aggregate, MEILISEARCH_CONFIG_PATH};
 use crate::option::{
     default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot,
 };
 use crate::routes::{create_all_stats, Stats};
-use crate::search::{
-    FederatedSearch, SearchQuery, SearchQueryWithIndex, SearchResult, SimilarQuery, SimilarResult,
-    DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
-    DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEMANTIC_RATIO,
-};
-use crate::{aggregate_methods, Opt};
+use crate::Opt;
 
 const ANALYTICS_HEADER: &str = "X-Meilisearch-Client";
 
@@ -489,858 +482,3 @@ impl Segment {
         let _ = self.batcher.flush().await;
     }
 }
-
-#[derive(Default)]
-pub struct SearchAggregator<Method: AggregateMethod> {
-    // requests
-    total_received: usize,
-    total_succeeded: usize,
-    total_degraded: usize,
-    total_used_negative_operator: usize,
-    time_spent: BinaryHeap<usize>,
-
-    // sort
-    sort_with_geo_point: bool,
-    // every time a request has a filter, this field must be incremented by the number of terms it contains
-    sort_sum_of_criteria_terms: usize,
-    // every time a request has a filter, this field must be incremented by one
-    sort_total_number_of_criteria: usize,
-
-    // distinct
-    distinct: bool,
-
-    // filter
-    filter_with_geo_radius: bool,
-    filter_with_geo_bounding_box: bool,
-    // every time a request has a filter, this field must be incremented by the number of terms it contains
-    filter_sum_of_criteria_terms: usize,
-    // every time a request has a filter, this field must be incremented by one
-    filter_total_number_of_criteria: usize,
-    used_syntax: HashMap<String, usize>,
-
-    // attributes_to_search_on
-    // every time a search is done using attributes_to_search_on
-    attributes_to_search_on_total_number_of_uses: usize,
-
-    // q
-    // The maximum number of terms in a q request
-    max_terms_number: usize,
-
-    // vector
-    // The maximum number of floats in a vector request
-    max_vector_size: usize,
-    // Whether the semantic ratio passed to a hybrid search equals the default ratio.
-    semantic_ratio: bool,
-    hybrid: bool,
-    retrieve_vectors: bool,
-
-    // every time a search is done, we increment the counter linked to the used settings
-    matching_strategy: HashMap<String, usize>,
-
-    // List of the unique Locales passed as parameter
-    locales: BTreeSet<Locale>,
-
-    // pagination
-    max_limit: usize,
-    max_offset: usize,
-    finite_pagination: usize,
-
-    // formatting
-    max_attributes_to_retrieve: usize,
-    max_attributes_to_highlight: usize,
-    highlight_pre_tag: bool,
-    highlight_post_tag: bool,
-    max_attributes_to_crop: usize,
-    crop_marker: bool,
-    show_matches_position: bool,
-    crop_length: bool,
-
-    // facets
-    facets_sum_of_terms: usize,
-    facets_total_number_of_facets: usize,
-
-    // scoring
-    show_ranking_score: bool,
-    show_ranking_score_details: bool,
-    ranking_score_threshold: bool,
-
-    marker: std::marker::PhantomData<Method>,
-}
-
-impl<Method: AggregateMethod> SearchAggregator<Method> {
-    #[allow(clippy::field_reassign_with_default)]
-    pub fn from_query(query: &SearchQuery) -> Self {
-        let SearchQuery {
-            q,
-            vector,
-            offset,
-            limit,
-            page,
-            hits_per_page,
-            attributes_to_retrieve: _,
-            retrieve_vectors,
-            attributes_to_crop: _,
-            crop_length,
-            attributes_to_highlight: _,
-            show_matches_position,
-            show_ranking_score,
-            show_ranking_score_details,
-            filter,
-            sort,
-            distinct,
-            facets: _,
-            highlight_pre_tag,
-            highlight_post_tag,
-            crop_marker,
-            matching_strategy,
-            attributes_to_search_on,
-            hybrid,
-            ranking_score_threshold,
-            locales,
-        } = query;
-
-        let mut ret = Self::default();
-
-        ret.total_received = 1;
-
-        if let Some(ref sort) = sort {
-            ret.sort_total_number_of_criteria = 1;
-            ret.sort_with_geo_point = sort.iter().any(|s| s.contains("_geoPoint("));
-            ret.sort_sum_of_criteria_terms = sort.len();
-        }
-
-        ret.distinct = distinct.is_some();
-
-        if let Some(ref filter) = filter {
-            static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
-            ret.filter_total_number_of_criteria = 1;
-
-            let syntax = match filter {
-                Value::String(_) => "string".to_string(),
-                Value::Array(values) => {
-                    if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
-                        "mixed".to_string()
-                    } else {
-                        "array".to_string()
-                    }
-                }
-                _ => "none".to_string(),
-            };
-            // convert the string to a HashMap
-            ret.used_syntax.insert(syntax, 1);
-
-            let stringified_filters = filter.to_string();
-            ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
-            ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
-            ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
-        }
-
-        // attributes_to_search_on
-        if attributes_to_search_on.is_some() {
-            ret.attributes_to_search_on_total_number_of_uses = 1;
-        }
-
-        if let Some(ref q) = q {
-            ret.max_terms_number = q.split_whitespace().count();
-        }
-
-        if let Some(ref vector) = vector {
-            ret.max_vector_size = vector.len();
-        }
-        ret.retrieve_vectors |= retrieve_vectors;
-
-        if query.is_finite_pagination() {
-            let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
-            ret.max_limit = limit;
-            ret.max_offset = page.unwrap_or(1).saturating_sub(1) * limit;
-            ret.finite_pagination = 1;
-        } else {
-            ret.max_limit = *limit;
-            ret.max_offset = *offset;
-            ret.finite_pagination = 0;
-        }
-
-        ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1);
-
-        if let Some(locales) = locales {
-            ret.locales = locales.iter().copied().collect();
-        }
-
-        ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
-        ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
-        ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
-        ret.crop_length = *crop_length != DEFAULT_CROP_LENGTH();
-        ret.show_matches_position = *show_matches_position;
-
-        ret.show_ranking_score = *show_ranking_score;
-        ret.show_ranking_score_details = *show_ranking_score_details;
-        ret.ranking_score_threshold = ranking_score_threshold.is_some();
-
-        if let Some(hybrid) = hybrid {
-            ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
-            ret.hybrid = true;
-        }
-
-        ret
-    }
-
-    pub fn succeed(&mut self, result: &SearchResult) {
-        let SearchResult {
-            hits: _,
-            query: _,
-            processing_time_ms,
-            hits_info: _,
-            semantic_hit_count: _,
-            facet_distribution: _,
-            facet_stats: _,
-            degraded,
-            used_negative_operator,
-        } = result;
-
-        self.total_succeeded = self.total_succeeded.saturating_add(1);
-        if *degraded {
-            self.total_degraded = self.total_degraded.saturating_add(1);
-        }
-        if *used_negative_operator {
-            self.total_used_negative_operator = self.total_used_negative_operator.saturating_add(1);
-        }
-        self.time_spent.push(*processing_time_ms as usize);
-    }
-}
-
-aggregate_methods!(
-    SearchGET => "Documents Searched GET",
-    SearchPOST => "Documents Searched POST",
-);
-
-impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
-    fn event_name(&self) -> &'static str {
-        Method::event_name()
-    }
-
-    fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
-        let Self {
-            total_received,
-            total_succeeded,
-            mut time_spent,
-            sort_with_geo_point,
-            sort_sum_of_criteria_terms,
-            sort_total_number_of_criteria,
-            distinct,
-            filter_with_geo_radius,
-            filter_with_geo_bounding_box,
-            filter_sum_of_criteria_terms,
-            filter_total_number_of_criteria,
-            used_syntax,
-            attributes_to_search_on_total_number_of_uses,
-            max_terms_number,
-            max_vector_size,
-            retrieve_vectors,
-            matching_strategy,
-            max_limit,
-            max_offset,
-            finite_pagination,
-            max_attributes_to_retrieve,
-            max_attributes_to_highlight,
-            highlight_pre_tag,
-            highlight_post_tag,
-            max_attributes_to_crop,
-            crop_marker,
-            show_matches_position,
-            crop_length,
-            facets_sum_of_terms,
-            facets_total_number_of_facets,
-            show_ranking_score,
-            show_ranking_score_details,
-            semantic_ratio,
-            hybrid,
-            total_degraded,
-            total_used_negative_operator,
-            ranking_score_threshold,
-            mut locales,
-            marker: _,
-        } = *new;
-
-        // request
-        self.total_received = self.total_received.saturating_add(total_received);
-        self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
-        self.total_degraded = self.total_degraded.saturating_add(total_degraded);
-        self.total_used_negative_operator =
-            self.total_used_negative_operator.saturating_add(total_used_negative_operator);
-        self.time_spent.append(&mut time_spent);
-
-        // sort
-        self.sort_with_geo_point |= sort_with_geo_point;
-        self.sort_sum_of_criteria_terms =
-            self.sort_sum_of_criteria_terms.saturating_add(sort_sum_of_criteria_terms);
-        self.sort_total_number_of_criteria =
-            self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria);
-
-        // distinct
-        self.distinct |= distinct;
-
-        // filter
-        self.filter_with_geo_radius |= filter_with_geo_radius;
-        self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
-        self.filter_sum_of_criteria_terms =
-            self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
-        self.filter_total_number_of_criteria =
-            self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
-        for (key, value) in used_syntax.into_iter() {
-            let used_syntax = self.used_syntax.entry(key).or_insert(0);
-            *used_syntax = used_syntax.saturating_add(value);
-        }
-
-        // attributes_to_search_on
-        self.attributes_to_search_on_total_number_of_uses = self
-            .attributes_to_search_on_total_number_of_uses
-            .saturating_add(attributes_to_search_on_total_number_of_uses);
-
-        // q
-        self.max_terms_number = self.max_terms_number.max(max_terms_number);
-
-        // vector
-        self.max_vector_size = self.max_vector_size.max(max_vector_size);
-        self.retrieve_vectors |= retrieve_vectors;
-        self.semantic_ratio |= semantic_ratio;
-        self.hybrid |= hybrid;
-
-        // pagination
-        self.max_limit = self.max_limit.max(max_limit);
-        self.max_offset = self.max_offset.max(max_offset);
-        self.finite_pagination += finite_pagination;
-
-        // formatting
-        self.max_attributes_to_retrieve =
-            self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
-        self.max_attributes_to_highlight =
-            self.max_attributes_to_highlight.max(max_attributes_to_highlight);
-        self.highlight_pre_tag |= highlight_pre_tag;
-        self.highlight_post_tag |= highlight_post_tag;
-        self.max_attributes_to_crop = self.max_attributes_to_crop.max(max_attributes_to_crop);
-        self.crop_marker |= crop_marker;
-        self.show_matches_position |= show_matches_position;
-        self.crop_length |= crop_length;
-
-        // facets
-        self.facets_sum_of_terms = self.facets_sum_of_terms.saturating_add(facets_sum_of_terms);
-        self.facets_total_number_of_facets =
-            self.facets_total_number_of_facets.saturating_add(facets_total_number_of_facets);
-
-        // matching strategy
-        for (key, value) in matching_strategy.into_iter() {
-            let matching_strategy = self.matching_strategy.entry(key).or_insert(0);
-            *matching_strategy = matching_strategy.saturating_add(value);
-        }
-
-        // scoring
-        self.show_ranking_score |= show_ranking_score;
-        self.show_ranking_score_details |= show_ranking_score_details;
-        self.ranking_score_threshold |= ranking_score_threshold;
-
-        // locales
-        self.locales.append(&mut locales);
-
-        self
-    }
-
-    fn into_event(self: Box<Self>) -> serde_json::Value {
-        let Self {
-            total_received,
-            total_succeeded,
-            time_spent,
-            sort_with_geo_point,
-            sort_sum_of_criteria_terms,
-            sort_total_number_of_criteria,
-            distinct,
-            filter_with_geo_radius,
-            filter_with_geo_bounding_box,
-            filter_sum_of_criteria_terms,
-            filter_total_number_of_criteria,
-            used_syntax,
-            attributes_to_search_on_total_number_of_uses,
-            max_terms_number,
-            max_vector_size,
-            retrieve_vectors,
-            matching_strategy,
-            max_limit,
-            max_offset,
-            finite_pagination,
-            max_attributes_to_retrieve,
-            max_attributes_to_highlight,
-            highlight_pre_tag,
-            highlight_post_tag,
-            max_attributes_to_crop,
-            crop_marker,
-            show_matches_position,
-            crop_length,
-            facets_sum_of_terms,
-            facets_total_number_of_facets,
-            show_ranking_score,
-            show_ranking_score_details,
-            semantic_ratio,
-            hybrid,
-            total_degraded,
-            total_used_negative_operator,
-            ranking_score_threshold,
-            locales,
-            marker: _,
-        } = *self;
-
-        // we get all the values in a sorted manner
-        let time_spent = time_spent.into_sorted_vec();
-        // the index of the 99th percentage of value
-        let percentile_99th = time_spent.len() * 99 / 100;
-        // We are only interested by the slowest value of the 99th fastest results
-        let time_spent = time_spent.get(percentile_99th);
-
-        json!({
-            "requests": {
-                "99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
-                "total_succeeded": total_succeeded,
-                "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
-                "total_received": total_received,
-                "total_degraded": total_degraded,
-                "total_used_negative_operator": total_used_negative_operator,
-            },
-            "sort": {
-                "with_geoPoint": sort_with_geo_point,
-                "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
-            },
-            "distinct": distinct,
-            "filter": {
-               "with_geoRadius": filter_with_geo_radius,
-               "with_geoBoundingBox": filter_with_geo_bounding_box,
-               "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
-               "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
-            },
-            "attributes_to_search_on": {
-               "total_number_of_uses": attributes_to_search_on_total_number_of_uses,
-            },
-            "q": {
-               "max_terms_number": max_terms_number,
-            },
-            "vector": {
-                "max_vector_size": max_vector_size,
-                "retrieve_vectors": retrieve_vectors,
-            },
-            "hybrid": {
-                "enabled": hybrid,
-                "semantic_ratio": semantic_ratio,
-            },
-            "pagination": {
-               "max_limit": max_limit,
-               "max_offset": max_offset,
-               "most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" },
-            },
-            "formatting": {
-                "max_attributes_to_retrieve": max_attributes_to_retrieve,
-                "max_attributes_to_highlight": max_attributes_to_highlight,
-                "highlight_pre_tag": highlight_pre_tag,
-                "highlight_post_tag": highlight_post_tag,
-                "max_attributes_to_crop": max_attributes_to_crop,
-                "crop_marker": crop_marker,
-                "show_matches_position": show_matches_position,
-                "crop_length": crop_length,
-            },
-            "facets": {
-                "avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64),
-            },
-            "matching_strategy": {
-                "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
-            },
-            "locales": locales,
-            "scoring": {
-                "show_ranking_score": show_ranking_score,
-                "show_ranking_score_details": show_ranking_score_details,
-                "ranking_score_threshold": ranking_score_threshold,
-            },
-        })
-    }
-}
-
-#[derive(Default)]
-pub struct MultiSearchAggregator {
-    // requests
-    total_received: usize,
-    total_succeeded: usize,
-
-    // sum of the number of distinct indexes in each single request, use with total_received to compute an avg
-    total_distinct_index_count: usize,
-    // number of queries with a single index, use with total_received to compute a proportion
-    total_single_index: usize,
-
-    // sum of the number of search queries in the requests, use with total_received to compute an average
-    total_search_count: usize,
-
-    // scoring
-    show_ranking_score: bool,
-    show_ranking_score_details: bool,
-
-    // federation
-    use_federation: bool,
-}
-
-impl MultiSearchAggregator {
-    pub fn from_federated_search(federated_search: &FederatedSearch) -> Self {
-        let use_federation = federated_search.federation.is_some();
-
-        let distinct_indexes: HashSet<_> = federated_search
-            .queries
-            .iter()
-            .map(|query| {
-                let query = &query;
-                // make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex
-                let SearchQueryWithIndex {
-                    index_uid,
-                    federation_options: _,
-                    q: _,
-                    vector: _,
-                    offset: _,
-                    limit: _,
-                    page: _,
-                    hits_per_page: _,
-                    attributes_to_retrieve: _,
-                    retrieve_vectors: _,
-                    attributes_to_crop: _,
-                    crop_length: _,
-                    attributes_to_highlight: _,
-                    show_ranking_score: _,
-                    show_ranking_score_details: _,
-                    show_matches_position: _,
-                    filter: _,
-                    sort: _,
-                    distinct: _,
-                    facets: _,
-                    highlight_pre_tag: _,
-                    highlight_post_tag: _,
-                    crop_marker: _,
-                    matching_strategy: _,
-                    attributes_to_search_on: _,
-                    hybrid: _,
-                    ranking_score_threshold: _,
-                    locales: _,
-                } = query;
-
-                index_uid.as_str()
-            })
-            .collect();
-
-        let show_ranking_score =
-            federated_search.queries.iter().any(|query| query.show_ranking_score);
-        let show_ranking_score_details =
-            federated_search.queries.iter().any(|query| query.show_ranking_score_details);
-
-        Self {
-            total_received: 1,
-            total_succeeded: 0,
-            total_distinct_index_count: distinct_indexes.len(),
-            total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
-            total_search_count: federated_search.queries.len(),
-            show_ranking_score,
-            show_ranking_score_details,
-            use_federation,
-        }
-    }
-
-    pub fn succeed(&mut self) {
-        self.total_succeeded = self.total_succeeded.saturating_add(1);
-    }
-}
-
-impl Aggregate for MultiSearchAggregator {
-    fn event_name(&self) -> &'static str {
-        "Documents Searched by Multi-Search POST"
-    }
-
-    /// Aggregate one [MultiSearchAggregator] into another.
-    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
-        // write the aggregate in a way that will cause a compilation error if a field is added.
-
-        // get ownership of self, replacing it by a default value.
-        let this = *self;
-
-        let total_received = this.total_received.saturating_add(new.total_received);
-        let total_succeeded = this.total_succeeded.saturating_add(new.total_succeeded);
-        let total_distinct_index_count =
-            this.total_distinct_index_count.saturating_add(new.total_distinct_index_count);
-        let total_single_index = this.total_single_index.saturating_add(new.total_single_index);
-        let total_search_count = this.total_search_count.saturating_add(new.total_search_count);
-        let show_ranking_score = this.show_ranking_score || new.show_ranking_score;
-        let show_ranking_score_details =
-            this.show_ranking_score_details || new.show_ranking_score_details;
-        let use_federation = this.use_federation || new.use_federation;
-
-        Box::new(Self {
-            total_received,
-            total_succeeded,
-            total_distinct_index_count,
-            total_single_index,
-            total_search_count,
-            show_ranking_score,
-            show_ranking_score_details,
-            use_federation,
-        })
-    }
-
-    fn into_event(self: Box<Self>) -> serde_json::Value {
-        let Self {
-            total_received,
-            total_succeeded,
-            total_distinct_index_count,
-            total_single_index,
-            total_search_count,
-            show_ranking_score,
-            show_ranking_score_details,
-            use_federation,
-        } = *self;
-
-        json!({
-            "requests": {
-                "total_succeeded": total_succeeded,
-                "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
-                "total_received": total_received,
-            },
-            "indexes": {
-                "total_single_index": total_single_index,
-                "total_distinct_index_count": total_distinct_index_count,
-                "avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early
-            },
-            "searches": {
-                "total_search_count": total_search_count,
-                "avg_search_count": (total_search_count as f64) / (total_received as f64),
-            },
-            "scoring": {
-                "show_ranking_score": show_ranking_score,
-                "show_ranking_score_details": show_ranking_score_details,
-            },
-            "federation": {
-                "use_federation": use_federation,
-            }
-        })
-    }
-}
-
-aggregate_methods!(
-    SimilarPOST => "Similar POST",
-    SimilarGET => "Similar GET",
-);
-
-#[derive(Default)]
-pub struct SimilarAggregator<Method: AggregateMethod> {
-    // requests
-    total_received: usize,
-    total_succeeded: usize,
-    time_spent: BinaryHeap<usize>,
-
-    // filter
-    filter_with_geo_radius: bool,
-    filter_with_geo_bounding_box: bool,
-    // every time a request has a filter, this field must be incremented by the number of terms it contains
-    filter_sum_of_criteria_terms: usize,
-    // every time a request has a filter, this field must be incremented by one
-    filter_total_number_of_criteria: usize,
-    used_syntax: HashMap<String, usize>,
-
-    // Whether a non-default embedder was specified
-    retrieve_vectors: bool,
-
-    // pagination
-    max_limit: usize,
-    max_offset: usize,
-
-    // formatting
-    max_attributes_to_retrieve: usize,
-
-    // scoring
-    show_ranking_score: bool,
-    show_ranking_score_details: bool,
-    ranking_score_threshold: bool,
-
-    marker: std::marker::PhantomData<Method>,
-}
-
-impl<Method: AggregateMethod> SimilarAggregator<Method> {
-    #[allow(clippy::field_reassign_with_default)]
-    pub fn from_query(query: &SimilarQuery) -> Self {
-        let SimilarQuery {
-            id: _,
-            embedder: _,
-            offset,
-            limit,
-            attributes_to_retrieve: _,
-            retrieve_vectors,
-            show_ranking_score,
-            show_ranking_score_details,
-            filter,
-            ranking_score_threshold,
-        } = query;
-
-        let mut ret = Self::default();
-
-        ret.total_received = 1;
-
-        if let Some(ref filter) = filter {
-            static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
-            ret.filter_total_number_of_criteria = 1;
-
-            let syntax = match filter {
-                Value::String(_) => "string".to_string(),
-                Value::Array(values) => {
-                    if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
-                        "mixed".to_string()
-                    } else {
-                        "array".to_string()
-                    }
-                }
-                _ => "none".to_string(),
-            };
-            // convert the string to a HashMap
-            ret.used_syntax.insert(syntax, 1);
-
-            let stringified_filters = filter.to_string();
-            ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
-            ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
-            ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
-        }
-
-        ret.max_limit = *limit;
-        ret.max_offset = *offset;
-
-        ret.show_ranking_score = *show_ranking_score;
-        ret.show_ranking_score_details = *show_ranking_score_details;
-        ret.ranking_score_threshold = ranking_score_threshold.is_some();
-
-        ret.retrieve_vectors = *retrieve_vectors;
-
-        ret
-    }
-
-    pub fn succeed(&mut self, result: &SimilarResult) {
-        let SimilarResult { id: _, hits: _, processing_time_ms, hits_info: _ } = result;
-
-        self.total_succeeded = self.total_succeeded.saturating_add(1);
-
-        self.time_spent.push(*processing_time_ms as usize);
-    }
-}
-
-impl<Method: AggregateMethod> Aggregate for SimilarAggregator<Method> {
-    fn event_name(&self) -> &'static str {
-        Method::event_name()
-    }
-
-    /// Aggregate one [SimilarAggregator] into another.
-    fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
-        let Self {
-            total_received,
-            total_succeeded,
-            mut time_spent,
-            filter_with_geo_radius,
-            filter_with_geo_bounding_box,
-            filter_sum_of_criteria_terms,
-            filter_total_number_of_criteria,
-            used_syntax,
-            max_limit,
-            max_offset,
-            max_attributes_to_retrieve,
-            show_ranking_score,
-            show_ranking_score_details,
-            ranking_score_threshold,
-            retrieve_vectors,
-            marker: _,
-        } = *new;
-
-        // request
-        self.total_received = self.total_received.saturating_add(total_received);
-        self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
-        self.time_spent.append(&mut time_spent);
-
-        // filter
-        self.filter_with_geo_radius |= filter_with_geo_radius;
-        self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
-        self.filter_sum_of_criteria_terms =
-            self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
-        self.filter_total_number_of_criteria =
-            self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
-        for (key, value) in used_syntax.into_iter() {
-            let used_syntax = self.used_syntax.entry(key).or_insert(0);
-            *used_syntax = used_syntax.saturating_add(value);
-        }
-
-        self.retrieve_vectors |= retrieve_vectors;
-
-        // pagination
-        self.max_limit = self.max_limit.max(max_limit);
-        self.max_offset = self.max_offset.max(max_offset);
-
-        // formatting
-        self.max_attributes_to_retrieve =
-            self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
-
-        // scoring
-        self.show_ranking_score |= show_ranking_score;
-        self.show_ranking_score_details |= show_ranking_score_details;
-        self.ranking_score_threshold |= ranking_score_threshold;
-
-        self
-    }
-
-    fn into_event(self: Box<Self>) -> serde_json::Value {
-        let Self {
-            total_received,
-            total_succeeded,
-            time_spent,
-            filter_with_geo_radius,
-            filter_with_geo_bounding_box,
-            filter_sum_of_criteria_terms,
-            filter_total_number_of_criteria,
-            used_syntax,
-            max_limit,
-            max_offset,
-            max_attributes_to_retrieve,
-            show_ranking_score,
-            show_ranking_score_details,
-            ranking_score_threshold,
-            retrieve_vectors,
-            marker: _,
-        } = *self;
-
-        // we get all the values in a sorted manner
-        let time_spent = time_spent.into_sorted_vec();
-        // the index of the 99th percentage of value
-        let percentile_99th = time_spent.len() * 99 / 100;
-        // We are only interested by the slowest value of the 99th fastest results
-        let time_spent = time_spent.get(percentile_99th);
-
-        json!({
-            "requests": {
-                "99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
-                "total_succeeded": total_succeeded,
-                "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
-                "total_received": total_received,
-            },
-            "filter": {
-               "with_geoRadius": filter_with_geo_radius,
-               "with_geoBoundingBox": filter_with_geo_bounding_box,
-               "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
-               "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
-            },
-            "vector": {
-                "retrieve_vectors": retrieve_vectors,
-            },
-            "pagination": {
-               "max_limit": max_limit,
-               "max_offset": max_offset,
-            },
-            "formatting": {
-                "max_attributes_to_retrieve": max_attributes_to_retrieve,
-            },
-            "scoring": {
-                "show_ranking_score": show_ranking_score,
-                "show_ranking_score_details": show_ranking_score_details,
-                "ranking_score_threshold": ranking_score_threshold,
-            }
-        })
-    }
-}
diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs
index c8183186d..7d073ec5f 100644
--- a/meilisearch/src/routes/indexes/mod.rs
+++ b/meilisearch/src/routes/indexes/mod.rs
@@ -28,9 +28,11 @@ use crate::Opt;
 pub mod documents;
 pub mod facet_search;
 pub mod search;
+mod search_analytics;
 pub mod settings;
 mod settings_analytics;
 pub mod similar;
+mod similar_analytics;
 
 pub fn configure(cfg: &mut web::ServiceConfig) {
     cfg.service(
diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs
index ac6e23c8f..2f5cb4a36 100644
--- a/meilisearch/src/routes/indexes/search.rs
+++ b/meilisearch/src/routes/indexes/search.rs
@@ -13,13 +13,13 @@ use meilisearch_types::serde_cs::vec::CS;
 use serde_json::Value;
 use tracing::debug;
 
-use crate::analytics::segment_analytics::{SearchGET, SearchPOST};
-use crate::analytics::{Analytics, SearchAggregator};
+use crate::analytics::Analytics;
 use crate::error::MeilisearchHttpError;
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
 use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
+use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
 use crate::search::{
     add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
     RetrieveVectors, SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH,
diff --git a/meilisearch/src/routes/indexes/search_analytics.rs b/meilisearch/src/routes/indexes/search_analytics.rs
new file mode 100644
index 000000000..8bbb1781f
--- /dev/null
+++ b/meilisearch/src/routes/indexes/search_analytics.rs
@@ -0,0 +1,485 @@
+use once_cell::sync::Lazy;
+use regex::Regex;
+use serde_json::{json, Value};
+use std::collections::{BTreeSet, BinaryHeap, HashMap};
+
+use meilisearch_types::locales::Locale;
+
+use crate::{
+    aggregate_methods,
+    analytics::{Aggregate, AggregateMethod},
+    search::{
+        SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
+        DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
+        DEFAULT_SEMANTIC_RATIO,
+    },
+};
+
+aggregate_methods!(
+    SearchGET => "Documents Searched GET",
+    SearchPOST => "Documents Searched POST",
+);
+
+#[derive(Default)]
+pub struct SearchAggregator<Method: AggregateMethod> {
+    // requests
+    total_received: usize,
+    total_succeeded: usize,
+    total_degraded: usize,
+    total_used_negative_operator: usize,
+    time_spent: BinaryHeap<usize>,
+
+    // sort
+    sort_with_geo_point: bool,
+    // every time a request has a filter, this field must be incremented by the number of terms it contains
+    sort_sum_of_criteria_terms: usize,
+    // every time a request has a filter, this field must be incremented by one
+    sort_total_number_of_criteria: usize,
+
+    // distinct
+    distinct: bool,
+
+    // filter
+    filter_with_geo_radius: bool,
+    filter_with_geo_bounding_box: bool,
+    // every time a request has a filter, this field must be incremented by the number of terms it contains
+    filter_sum_of_criteria_terms: usize,
+    // every time a request has a filter, this field must be incremented by one
+    filter_total_number_of_criteria: usize,
+    used_syntax: HashMap<String, usize>,
+
+    // attributes_to_search_on
+    // every time a search is done using attributes_to_search_on
+    attributes_to_search_on_total_number_of_uses: usize,
+
+    // q
+    // The maximum number of terms in a q request
+    max_terms_number: usize,
+
+    // vector
+    // The maximum number of floats in a vector request
+    max_vector_size: usize,
+    // Whether the semantic ratio passed to a hybrid search equals the default ratio.
+    semantic_ratio: bool,
+    hybrid: bool,
+    retrieve_vectors: bool,
+
+    // every time a search is done, we increment the counter linked to the used settings
+    matching_strategy: HashMap<String, usize>,
+
+    // List of the unique Locales passed as parameter
+    locales: BTreeSet<Locale>,
+
+    // pagination
+    max_limit: usize,
+    max_offset: usize,
+    finite_pagination: usize,
+
+    // formatting
+    max_attributes_to_retrieve: usize,
+    max_attributes_to_highlight: usize,
+    highlight_pre_tag: bool,
+    highlight_post_tag: bool,
+    max_attributes_to_crop: usize,
+    crop_marker: bool,
+    show_matches_position: bool,
+    crop_length: bool,
+
+    // facets
+    facets_sum_of_terms: usize,
+    facets_total_number_of_facets: usize,
+
+    // scoring
+    show_ranking_score: bool,
+    show_ranking_score_details: bool,
+    ranking_score_threshold: bool,
+
+    marker: std::marker::PhantomData<Method>,
+}
+
+impl<Method: AggregateMethod> SearchAggregator<Method> {
+    #[allow(clippy::field_reassign_with_default)]
+    pub fn from_query(query: &SearchQuery) -> Self {
+        let SearchQuery {
+            q,
+            vector,
+            offset,
+            limit,
+            page,
+            hits_per_page,
+            attributes_to_retrieve: _,
+            retrieve_vectors,
+            attributes_to_crop: _,
+            crop_length,
+            attributes_to_highlight: _,
+            show_matches_position,
+            show_ranking_score,
+            show_ranking_score_details,
+            filter,
+            sort,
+            distinct,
+            facets: _,
+            highlight_pre_tag,
+            highlight_post_tag,
+            crop_marker,
+            matching_strategy,
+            attributes_to_search_on,
+            hybrid,
+            ranking_score_threshold,
+            locales,
+        } = query;
+
+        let mut ret = Self::default();
+
+        ret.total_received = 1;
+
+        if let Some(ref sort) = sort {
+            ret.sort_total_number_of_criteria = 1;
+            ret.sort_with_geo_point = sort.iter().any(|s| s.contains("_geoPoint("));
+            ret.sort_sum_of_criteria_terms = sort.len();
+        }
+
+        ret.distinct = distinct.is_some();
+
+        if let Some(ref filter) = filter {
+            static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
+            ret.filter_total_number_of_criteria = 1;
+
+            let syntax = match filter {
+                Value::String(_) => "string".to_string(),
+                Value::Array(values) => {
+                    if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
+                        "mixed".to_string()
+                    } else {
+                        "array".to_string()
+                    }
+                }
+                _ => "none".to_string(),
+            };
+            // convert the string to a HashMap
+            ret.used_syntax.insert(syntax, 1);
+
+            let stringified_filters = filter.to_string();
+            ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
+            ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
+            ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
+        }
+
+        // attributes_to_search_on
+        if attributes_to_search_on.is_some() {
+            ret.attributes_to_search_on_total_number_of_uses = 1;
+        }
+
+        if let Some(ref q) = q {
+            ret.max_terms_number = q.split_whitespace().count();
+        }
+
+        if let Some(ref vector) = vector {
+            ret.max_vector_size = vector.len();
+        }
+        ret.retrieve_vectors |= retrieve_vectors;
+
+        if query.is_finite_pagination() {
+            let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
+            ret.max_limit = limit;
+            ret.max_offset = page.unwrap_or(1).saturating_sub(1) * limit;
+            ret.finite_pagination = 1;
+        } else {
+            ret.max_limit = *limit;
+            ret.max_offset = *offset;
+            ret.finite_pagination = 0;
+        }
+
+        ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1);
+
+        if let Some(locales) = locales {
+            ret.locales = locales.iter().copied().collect();
+        }
+
+        ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
+        ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
+        ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
+        ret.crop_length = *crop_length != DEFAULT_CROP_LENGTH();
+        ret.show_matches_position = *show_matches_position;
+
+        ret.show_ranking_score = *show_ranking_score;
+        ret.show_ranking_score_details = *show_ranking_score_details;
+        ret.ranking_score_threshold = ranking_score_threshold.is_some();
+
+        if let Some(hybrid) = hybrid {
+            ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
+            ret.hybrid = true;
+        }
+
+        ret
+    }
+
+    pub fn succeed(&mut self, result: &SearchResult) {
+        let SearchResult {
+            hits: _,
+            query: _,
+            processing_time_ms,
+            hits_info: _,
+            semantic_hit_count: _,
+            facet_distribution: _,
+            facet_stats: _,
+            degraded,
+            used_negative_operator,
+        } = result;
+
+        self.total_succeeded = self.total_succeeded.saturating_add(1);
+        if *degraded {
+            self.total_degraded = self.total_degraded.saturating_add(1);
+        }
+        if *used_negative_operator {
+            self.total_used_negative_operator = self.total_used_negative_operator.saturating_add(1);
+        }
+        self.time_spent.push(*processing_time_ms as usize);
+    }
+}
+
+impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
+    fn event_name(&self) -> &'static str {
+        Method::event_name()
+    }
+
+    fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        let Self {
+            total_received,
+            total_succeeded,
+            mut time_spent,
+            sort_with_geo_point,
+            sort_sum_of_criteria_terms,
+            sort_total_number_of_criteria,
+            distinct,
+            filter_with_geo_radius,
+            filter_with_geo_bounding_box,
+            filter_sum_of_criteria_terms,
+            filter_total_number_of_criteria,
+            used_syntax,
+            attributes_to_search_on_total_number_of_uses,
+            max_terms_number,
+            max_vector_size,
+            retrieve_vectors,
+            matching_strategy,
+            max_limit,
+            max_offset,
+            finite_pagination,
+            max_attributes_to_retrieve,
+            max_attributes_to_highlight,
+            highlight_pre_tag,
+            highlight_post_tag,
+            max_attributes_to_crop,
+            crop_marker,
+            show_matches_position,
+            crop_length,
+            facets_sum_of_terms,
+            facets_total_number_of_facets,
+            show_ranking_score,
+            show_ranking_score_details,
+            semantic_ratio,
+            hybrid,
+            total_degraded,
+            total_used_negative_operator,
+            ranking_score_threshold,
+            mut locales,
+            marker: _,
+        } = *new;
+
+        // request
+        self.total_received = self.total_received.saturating_add(total_received);
+        self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
+        self.total_degraded = self.total_degraded.saturating_add(total_degraded);
+        self.total_used_negative_operator =
+            self.total_used_negative_operator.saturating_add(total_used_negative_operator);
+        self.time_spent.append(&mut time_spent);
+
+        // sort
+        self.sort_with_geo_point |= sort_with_geo_point;
+        self.sort_sum_of_criteria_terms =
+            self.sort_sum_of_criteria_terms.saturating_add(sort_sum_of_criteria_terms);
+        self.sort_total_number_of_criteria =
+            self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria);
+
+        // distinct
+        self.distinct |= distinct;
+
+        // filter
+        self.filter_with_geo_radius |= filter_with_geo_radius;
+        self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
+        self.filter_sum_of_criteria_terms =
+            self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
+        self.filter_total_number_of_criteria =
+            self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
+        for (key, value) in used_syntax.into_iter() {
+            let used_syntax = self.used_syntax.entry(key).or_insert(0);
+            *used_syntax = used_syntax.saturating_add(value);
+        }
+
+        // attributes_to_search_on
+        self.attributes_to_search_on_total_number_of_uses = self
+            .attributes_to_search_on_total_number_of_uses
+            .saturating_add(attributes_to_search_on_total_number_of_uses);
+
+        // q
+        self.max_terms_number = self.max_terms_number.max(max_terms_number);
+
+        // vector
+        self.max_vector_size = self.max_vector_size.max(max_vector_size);
+        self.retrieve_vectors |= retrieve_vectors;
+        self.semantic_ratio |= semantic_ratio;
+        self.hybrid |= hybrid;
+
+        // pagination
+        self.max_limit = self.max_limit.max(max_limit);
+        self.max_offset = self.max_offset.max(max_offset);
+        self.finite_pagination += finite_pagination;
+
+        // formatting
+        self.max_attributes_to_retrieve =
+            self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
+        self.max_attributes_to_highlight =
+            self.max_attributes_to_highlight.max(max_attributes_to_highlight);
+        self.highlight_pre_tag |= highlight_pre_tag;
+        self.highlight_post_tag |= highlight_post_tag;
+        self.max_attributes_to_crop = self.max_attributes_to_crop.max(max_attributes_to_crop);
+        self.crop_marker |= crop_marker;
+        self.show_matches_position |= show_matches_position;
+        self.crop_length |= crop_length;
+
+        // facets
+        self.facets_sum_of_terms = self.facets_sum_of_terms.saturating_add(facets_sum_of_terms);
+        self.facets_total_number_of_facets =
+            self.facets_total_number_of_facets.saturating_add(facets_total_number_of_facets);
+
+        // matching strategy
+        for (key, value) in matching_strategy.into_iter() {
+            let matching_strategy = self.matching_strategy.entry(key).or_insert(0);
+            *matching_strategy = matching_strategy.saturating_add(value);
+        }
+
+        // scoring
+        self.show_ranking_score |= show_ranking_score;
+        self.show_ranking_score_details |= show_ranking_score_details;
+        self.ranking_score_threshold |= ranking_score_threshold;
+
+        // locales
+        self.locales.append(&mut locales);
+
+        self
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        let Self {
+            total_received,
+            total_succeeded,
+            time_spent,
+            sort_with_geo_point,
+            sort_sum_of_criteria_terms,
+            sort_total_number_of_criteria,
+            distinct,
+            filter_with_geo_radius,
+            filter_with_geo_bounding_box,
+            filter_sum_of_criteria_terms,
+            filter_total_number_of_criteria,
+            used_syntax,
+            attributes_to_search_on_total_number_of_uses,
+            max_terms_number,
+            max_vector_size,
+            retrieve_vectors,
+            matching_strategy,
+            max_limit,
+            max_offset,
+            finite_pagination,
+            max_attributes_to_retrieve,
+            max_attributes_to_highlight,
+            highlight_pre_tag,
+            highlight_post_tag,
+            max_attributes_to_crop,
+            crop_marker,
+            show_matches_position,
+            crop_length,
+            facets_sum_of_terms,
+            facets_total_number_of_facets,
+            show_ranking_score,
+            show_ranking_score_details,
+            semantic_ratio,
+            hybrid,
+            total_degraded,
+            total_used_negative_operator,
+            ranking_score_threshold,
+            locales,
+            marker: _,
+        } = *self;
+
+        // we get all the values in a sorted manner
+        let time_spent = time_spent.into_sorted_vec();
+        // the index of the 99th percentage of value
+        let percentile_99th = time_spent.len() * 99 / 100;
+        // We are only interested by the slowest value of the 99th fastest results
+        let time_spent = time_spent.get(percentile_99th);
+
+        json!({
+            "requests": {
+                "99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
+                "total_succeeded": total_succeeded,
+                "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
+                "total_received": total_received,
+                "total_degraded": total_degraded,
+                "total_used_negative_operator": total_used_negative_operator,
+            },
+            "sort": {
+                "with_geoPoint": sort_with_geo_point,
+                "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
+            },
+            "distinct": distinct,
+            "filter": {
+               "with_geoRadius": filter_with_geo_radius,
+               "with_geoBoundingBox": filter_with_geo_bounding_box,
+               "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
+               "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
+            },
+            "attributes_to_search_on": {
+               "total_number_of_uses": attributes_to_search_on_total_number_of_uses,
+            },
+            "q": {
+               "max_terms_number": max_terms_number,
+            },
+            "vector": {
+                "max_vector_size": max_vector_size,
+                "retrieve_vectors": retrieve_vectors,
+            },
+            "hybrid": {
+                "enabled": hybrid,
+                "semantic_ratio": semantic_ratio,
+            },
+            "pagination": {
+               "max_limit": max_limit,
+               "max_offset": max_offset,
+               "most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" },
+            },
+            "formatting": {
+                "max_attributes_to_retrieve": max_attributes_to_retrieve,
+                "max_attributes_to_highlight": max_attributes_to_highlight,
+                "highlight_pre_tag": highlight_pre_tag,
+                "highlight_post_tag": highlight_post_tag,
+                "max_attributes_to_crop": max_attributes_to_crop,
+                "crop_marker": crop_marker,
+                "show_matches_position": show_matches_position,
+                "crop_length": crop_length,
+            },
+            "facets": {
+                "avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64),
+            },
+            "matching_strategy": {
+                "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
+            },
+            "locales": locales,
+            "scoring": {
+                "show_ranking_score": show_ranking_score,
+                "show_ranking_score_details": show_ranking_score_details,
+                "ranking_score_threshold": ranking_score_threshold,
+            },
+        })
+    }
+}
diff --git a/meilisearch/src/routes/indexes/similar.rs b/meilisearch/src/routes/indexes/similar.rs
index 33df6bdad..79f42f0aa 100644
--- a/meilisearch/src/routes/indexes/similar.rs
+++ b/meilisearch/src/routes/indexes/similar.rs
@@ -13,10 +13,10 @@ use serde_json::Value;
 use tracing::debug;
 
 use super::ActionPolicy;
-use crate::analytics::segment_analytics::{SimilarGET, SimilarPOST};
-use crate::analytics::{Analytics, SimilarAggregator};
+use crate::analytics::Analytics;
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
+use crate::routes::indexes::similar_analytics::{SimilarAggregator, SimilarGET, SimilarPOST};
 use crate::search::{
     add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, SearchKind,
     SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
diff --git a/meilisearch/src/routes/indexes/similar_analytics.rs b/meilisearch/src/routes/indexes/similar_analytics.rs
new file mode 100644
index 000000000..69685a56c
--- /dev/null
+++ b/meilisearch/src/routes/indexes/similar_analytics.rs
@@ -0,0 +1,235 @@
+use std::collections::{BinaryHeap, HashMap};
+
+use once_cell::sync::Lazy;
+use regex::Regex;
+use serde_json::{json, Value};
+
+use crate::{
+    aggregate_methods,
+    analytics::{Aggregate, AggregateMethod},
+    search::{SimilarQuery, SimilarResult},
+};
+
+aggregate_methods!(
+    SimilarPOST => "Similar POST",
+    SimilarGET => "Similar GET",
+);
+
+#[derive(Default)]
+pub struct SimilarAggregator<Method: AggregateMethod> {
+    // requests
+    total_received: usize,
+    total_succeeded: usize,
+    time_spent: BinaryHeap<usize>,
+
+    // filter
+    filter_with_geo_radius: bool,
+    filter_with_geo_bounding_box: bool,
+    // every time a request has a filter, this field must be incremented by the number of terms it contains
+    filter_sum_of_criteria_terms: usize,
+    // every time a request has a filter, this field must be incremented by one
+    filter_total_number_of_criteria: usize,
+    used_syntax: HashMap<String, usize>,
+
+    // Whether a non-default embedder was specified
+    retrieve_vectors: bool,
+
+    // pagination
+    max_limit: usize,
+    max_offset: usize,
+
+    // formatting
+    max_attributes_to_retrieve: usize,
+
+    // scoring
+    show_ranking_score: bool,
+    show_ranking_score_details: bool,
+    ranking_score_threshold: bool,
+
+    marker: std::marker::PhantomData<Method>,
+}
+
+impl<Method: AggregateMethod> SimilarAggregator<Method> {
+    #[allow(clippy::field_reassign_with_default)]
+    pub fn from_query(query: &SimilarQuery) -> Self {
+        let SimilarQuery {
+            id: _,
+            embedder: _,
+            offset,
+            limit,
+            attributes_to_retrieve: _,
+            retrieve_vectors,
+            show_ranking_score,
+            show_ranking_score_details,
+            filter,
+            ranking_score_threshold,
+        } = query;
+
+        let mut ret = Self::default();
+
+        ret.total_received = 1;
+
+        if let Some(ref filter) = filter {
+            static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
+            ret.filter_total_number_of_criteria = 1;
+
+            let syntax = match filter {
+                Value::String(_) => "string".to_string(),
+                Value::Array(values) => {
+                    if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
+                        "mixed".to_string()
+                    } else {
+                        "array".to_string()
+                    }
+                }
+                _ => "none".to_string(),
+            };
+            // convert the string to a HashMap
+            ret.used_syntax.insert(syntax, 1);
+
+            let stringified_filters = filter.to_string();
+            ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
+            ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
+            ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
+        }
+
+        ret.max_limit = *limit;
+        ret.max_offset = *offset;
+
+        ret.show_ranking_score = *show_ranking_score;
+        ret.show_ranking_score_details = *show_ranking_score_details;
+        ret.ranking_score_threshold = ranking_score_threshold.is_some();
+
+        ret.retrieve_vectors = *retrieve_vectors;
+
+        ret
+    }
+
+    pub fn succeed(&mut self, result: &SimilarResult) {
+        let SimilarResult { id: _, hits: _, processing_time_ms, hits_info: _ } = result;
+
+        self.total_succeeded = self.total_succeeded.saturating_add(1);
+
+        self.time_spent.push(*processing_time_ms as usize);
+    }
+}
+
+impl<Method: AggregateMethod> Aggregate for SimilarAggregator<Method> {
+    fn event_name(&self) -> &'static str {
+        Method::event_name()
+    }
+
+    /// Aggregate one [SimilarAggregator] into another.
+    fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        let Self {
+            total_received,
+            total_succeeded,
+            mut time_spent,
+            filter_with_geo_radius,
+            filter_with_geo_bounding_box,
+            filter_sum_of_criteria_terms,
+            filter_total_number_of_criteria,
+            used_syntax,
+            max_limit,
+            max_offset,
+            max_attributes_to_retrieve,
+            show_ranking_score,
+            show_ranking_score_details,
+            ranking_score_threshold,
+            retrieve_vectors,
+            marker: _,
+        } = *new;
+
+        // request
+        self.total_received = self.total_received.saturating_add(total_received);
+        self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
+        self.time_spent.append(&mut time_spent);
+
+        // filter
+        self.filter_with_geo_radius |= filter_with_geo_radius;
+        self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
+        self.filter_sum_of_criteria_terms =
+            self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
+        self.filter_total_number_of_criteria =
+            self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
+        for (key, value) in used_syntax.into_iter() {
+            let used_syntax = self.used_syntax.entry(key).or_insert(0);
+            *used_syntax = used_syntax.saturating_add(value);
+        }
+
+        self.retrieve_vectors |= retrieve_vectors;
+
+        // pagination
+        self.max_limit = self.max_limit.max(max_limit);
+        self.max_offset = self.max_offset.max(max_offset);
+
+        // formatting
+        self.max_attributes_to_retrieve =
+            self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
+
+        // scoring
+        self.show_ranking_score |= show_ranking_score;
+        self.show_ranking_score_details |= show_ranking_score_details;
+        self.ranking_score_threshold |= ranking_score_threshold;
+
+        self
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        let Self {
+            total_received,
+            total_succeeded,
+            time_spent,
+            filter_with_geo_radius,
+            filter_with_geo_bounding_box,
+            filter_sum_of_criteria_terms,
+            filter_total_number_of_criteria,
+            used_syntax,
+            max_limit,
+            max_offset,
+            max_attributes_to_retrieve,
+            show_ranking_score,
+            show_ranking_score_details,
+            ranking_score_threshold,
+            retrieve_vectors,
+            marker: _,
+        } = *self;
+
+        // we get all the values in a sorted manner
+        let time_spent = time_spent.into_sorted_vec();
+        // the index of the 99th percentage of value
+        let percentile_99th = time_spent.len() * 99 / 100;
+        // We are only interested by the slowest value of the 99th fastest results
+        let time_spent = time_spent.get(percentile_99th);
+
+        json!({
+            "requests": {
+                "99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
+                "total_succeeded": total_succeeded,
+                "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
+                "total_received": total_received,
+            },
+            "filter": {
+               "with_geoRadius": filter_with_geo_radius,
+               "with_geoBoundingBox": filter_with_geo_bounding_box,
+               "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
+               "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
+            },
+            "vector": {
+                "retrieve_vectors": retrieve_vectors,
+            },
+            "pagination": {
+               "max_limit": max_limit,
+               "max_offset": max_offset,
+            },
+            "formatting": {
+                "max_attributes_to_retrieve": max_attributes_to_retrieve,
+            },
+            "scoring": {
+                "show_ranking_score": show_ranking_score,
+                "show_ranking_score_details": show_ranking_score_details,
+                "ranking_score_threshold": ranking_score_threshold,
+            }
+        })
+    }
+}
diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs
index c25aeee70..b7260ea08 100644
--- a/meilisearch/src/routes/mod.rs
+++ b/meilisearch/src/routes/mod.rs
@@ -25,6 +25,7 @@ pub mod indexes;
 mod logs;
 mod metrics;
 mod multi_search;
+mod multi_search_analytics;
 mod snapshot;
 mod swap_indexes;
 pub mod tasks;
diff --git a/meilisearch/src/routes/multi_search.rs b/meilisearch/src/routes/multi_search.rs
index 13a39cb44..b7bd31716 100644
--- a/meilisearch/src/routes/multi_search.rs
+++ b/meilisearch/src/routes/multi_search.rs
@@ -9,7 +9,7 @@ use meilisearch_types::keys::actions;
 use serde::Serialize;
 use tracing::debug;
 
-use crate::analytics::{Analytics, MultiSearchAggregator};
+use crate::analytics::Analytics;
 use crate::error::MeilisearchHttpError;
 use crate::extractors::authentication::policies::ActionPolicy;
 use crate::extractors::authentication::{AuthenticationError, GuardedData};
@@ -21,6 +21,8 @@ use crate::search::{
 };
 use crate::search_queue::SearchQueue;
 
+use super::multi_search_analytics::MultiSearchAggregator;
+
 pub fn configure(cfg: &mut web::ServiceConfig) {
     cfg.service(web::resource("").route(web::post().to(SeqHandler(multi_search_with_post))));
 }
diff --git a/meilisearch/src/routes/multi_search_analytics.rs b/meilisearch/src/routes/multi_search_analytics.rs
new file mode 100644
index 000000000..be1218399
--- /dev/null
+++ b/meilisearch/src/routes/multi_search_analytics.rs
@@ -0,0 +1,170 @@
+use std::collections::HashSet;
+
+use serde_json::json;
+
+use crate::{
+    analytics::Aggregate,
+    search::{FederatedSearch, SearchQueryWithIndex},
+};
+
+#[derive(Default)]
+pub struct MultiSearchAggregator {
+    // requests
+    total_received: usize,
+    total_succeeded: usize,
+
+    // sum of the number of distinct indexes in each single request, use with total_received to compute an avg
+    total_distinct_index_count: usize,
+    // number of queries with a single index, use with total_received to compute a proportion
+    total_single_index: usize,
+
+    // sum of the number of search queries in the requests, use with total_received to compute an average
+    total_search_count: usize,
+
+    // scoring
+    show_ranking_score: bool,
+    show_ranking_score_details: bool,
+
+    // federation
+    use_federation: bool,
+}
+
+impl MultiSearchAggregator {
+    pub fn from_federated_search(federated_search: &FederatedSearch) -> Self {
+        let use_federation = federated_search.federation.is_some();
+
+        let distinct_indexes: HashSet<_> = federated_search
+            .queries
+            .iter()
+            .map(|query| {
+                let query = &query;
+                // make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex
+                let SearchQueryWithIndex {
+                    index_uid,
+                    federation_options: _,
+                    q: _,
+                    vector: _,
+                    offset: _,
+                    limit: _,
+                    page: _,
+                    hits_per_page: _,
+                    attributes_to_retrieve: _,
+                    retrieve_vectors: _,
+                    attributes_to_crop: _,
+                    crop_length: _,
+                    attributes_to_highlight: _,
+                    show_ranking_score: _,
+                    show_ranking_score_details: _,
+                    show_matches_position: _,
+                    filter: _,
+                    sort: _,
+                    distinct: _,
+                    facets: _,
+                    highlight_pre_tag: _,
+                    highlight_post_tag: _,
+                    crop_marker: _,
+                    matching_strategy: _,
+                    attributes_to_search_on: _,
+                    hybrid: _,
+                    ranking_score_threshold: _,
+                    locales: _,
+                } = query;
+
+                index_uid.as_str()
+            })
+            .collect();
+
+        let show_ranking_score =
+            federated_search.queries.iter().any(|query| query.show_ranking_score);
+        let show_ranking_score_details =
+            federated_search.queries.iter().any(|query| query.show_ranking_score_details);
+
+        Self {
+            total_received: 1,
+            total_succeeded: 0,
+            total_distinct_index_count: distinct_indexes.len(),
+            total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
+            total_search_count: federated_search.queries.len(),
+            show_ranking_score,
+            show_ranking_score_details,
+            use_federation,
+        }
+    }
+
+    pub fn succeed(&mut self) {
+        self.total_succeeded = self.total_succeeded.saturating_add(1);
+    }
+}
+
+impl Aggregate for MultiSearchAggregator {
+    fn event_name(&self) -> &'static str {
+        "Documents Searched by Multi-Search POST"
+    }
+
+    /// Aggregate one [MultiSearchAggregator] into another.
+    fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
+        // write the aggregate in a way that will cause a compilation error if a field is added.
+
+        // get ownership of self, replacing it by a default value.
+        let this = *self;
+
+        let total_received = this.total_received.saturating_add(new.total_received);
+        let total_succeeded = this.total_succeeded.saturating_add(new.total_succeeded);
+        let total_distinct_index_count =
+            this.total_distinct_index_count.saturating_add(new.total_distinct_index_count);
+        let total_single_index = this.total_single_index.saturating_add(new.total_single_index);
+        let total_search_count = this.total_search_count.saturating_add(new.total_search_count);
+        let show_ranking_score = this.show_ranking_score || new.show_ranking_score;
+        let show_ranking_score_details =
+            this.show_ranking_score_details || new.show_ranking_score_details;
+        let use_federation = this.use_federation || new.use_federation;
+
+        Box::new(Self {
+            total_received,
+            total_succeeded,
+            total_distinct_index_count,
+            total_single_index,
+            total_search_count,
+            show_ranking_score,
+            show_ranking_score_details,
+            use_federation,
+        })
+    }
+
+    fn into_event(self: Box<Self>) -> serde_json::Value {
+        let Self {
+            total_received,
+            total_succeeded,
+            total_distinct_index_count,
+            total_single_index,
+            total_search_count,
+            show_ranking_score,
+            show_ranking_score_details,
+            use_federation,
+        } = *self;
+
+        json!({
+            "requests": {
+                "total_succeeded": total_succeeded,
+                "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
+                "total_received": total_received,
+            },
+            "indexes": {
+                "total_single_index": total_single_index,
+                "total_distinct_index_count": total_distinct_index_count,
+                "avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early
+            },
+            "searches": {
+                "total_search_count": total_search_count,
+                "avg_search_count": (total_search_count as f64) / (total_received as f64),
+            },
+            "scoring": {
+                "show_ranking_score": show_ranking_score,
+                "show_ranking_score_details": show_ranking_score_details,
+            },
+            "federation": {
+                "use_federation": use_federation,
+            }
+        })
+    }
+}

From b02a72c0c0d68068c5c20e77b4f5c9d2e151375f Mon Sep 17 00:00:00 2001
From: Pedro Turik Firmino <pedroturik@gmail.com>
Date: Tue, 29 Oct 2024 19:30:11 -0300
Subject: [PATCH 74/92] Applies optimizations to some integration tests

---
 .../tests/documents/update_documents.rs       | 73 +++++++++----------
 1 file changed, 36 insertions(+), 37 deletions(-)

diff --git a/crates/meilisearch/tests/documents/update_documents.rs b/crates/meilisearch/tests/documents/update_documents.rs
index 195dca914..c0703e81b 100644
--- a/crates/meilisearch/tests/documents/update_documents.rs
+++ b/crates/meilisearch/tests/documents/update_documents.rs
@@ -23,8 +23,8 @@ async fn error_document_update_create_index_bad_uid() {
 
 #[actix_rt::test]
 async fn document_update_with_primary_key() {
-    let server = Server::new().await;
-    let index = server.index("test");
+    let server = Server::new_shared();
+    let index = server.unique_index();
 
     let documents = json!([
         {
@@ -32,15 +32,14 @@ async fn document_update_with_primary_key() {
             "content": "foo",
         }
     ]);
-    let (_response, code) = index.update_documents(documents, Some("primary")).await;
+    let (response, code) = index.update_documents(documents, Some("primary")).await;
     assert_eq!(code, 202);
 
-    index.wait_task(0).await;
+    index.wait_task(response.uid()).await.succeeded();
 
-    let (response, code) = index.get_task(0).await;
+    let (response, code) = index.get_task(response.uid()).await;
     assert_eq!(code, 200);
     assert_eq!(response["status"], "succeeded");
-    assert_eq!(response["uid"], 0);
     assert_eq!(response["type"], "documentAdditionOrUpdate");
     assert_eq!(response["details"]["indexedDocuments"], 1);
     assert_eq!(response["details"]["receivedDocuments"], 1);
@@ -52,8 +51,8 @@ async fn document_update_with_primary_key() {
 
 #[actix_rt::test]
 async fn update_document() {
-    let server = Server::new().await;
-    let index = server.index("test");
+    let server = Server::new_shared();
+    let index = server.unique_index();
 
     let documents = json!([
         {
@@ -62,10 +61,10 @@ async fn update_document() {
         }
     ]);
 
-    let (_response, code) = index.add_documents(documents, None).await;
+    let (response, code) = index.add_documents(documents, None).await;
     assert_eq!(code, 202);
 
-    index.wait_task(0).await;
+    index.wait_task(response.uid()).await.succeeded();
 
     let documents = json!([
         {
@@ -77,9 +76,9 @@ async fn update_document() {
     let (response, code) = index.update_documents(documents, None).await;
     assert_eq!(code, 202, "response: {}", response);
 
-    index.wait_task(1).await;
+    index.wait_task(response.uid()).await.succeeded();
 
-    let (response, code) = index.get_task(1).await;
+    let (response, code) = index.get_task(response.uid()).await;
     assert_eq!(code, 200);
     assert_eq!(response["status"], "succeeded");
 
@@ -96,8 +95,8 @@ async fn update_document() {
 
 #[actix_rt::test]
 async fn update_document_gzip_encoded() {
-    let server = Server::new().await;
-    let index = server.index_with_encoder("test", Encoder::Gzip);
+    let server = Server::new_shared();
+    let index = server.unique_index_with_encoder(Encoder::Gzip);
 
     let documents = json!([
         {
@@ -106,10 +105,10 @@ async fn update_document_gzip_encoded() {
         }
     ]);
 
-    let (_response, code) = index.add_documents(documents, None).await;
+    let (response, code) = index.add_documents(documents, None).await;
     assert_eq!(code, 202);
 
-    index.wait_task(0).await;
+    index.wait_task(response.uid()).await.succeeded();
 
     let documents = json!([
         {
@@ -121,9 +120,9 @@ async fn update_document_gzip_encoded() {
     let (response, code) = index.update_documents(documents, None).await;
     assert_eq!(code, 202, "response: {}", response);
 
-    index.wait_task(1).await;
+    index.wait_task(response.uid()).await.succeeded();
 
-    let (response, code) = index.get_task(1).await;
+    let (response, code) = index.get_task(response.uid()).await;
     assert_eq!(code, 200);
     assert_eq!(response["status"], "succeeded");
 
@@ -140,12 +139,12 @@ async fn update_document_gzip_encoded() {
 
 #[actix_rt::test]
 async fn update_larger_dataset() {
-    let server = Server::new().await;
-    let index = server.index("test");
+    let server = Server::new_shared();
+    let index = server.unique_index();
     let documents = serde_json::from_str(include_str!("../assets/test_set.json")).unwrap();
-    index.update_documents(documents, None).await;
-    index.wait_task(0).await;
-    let (response, code) = index.get_task(0).await;
+    let (task, _code) = index.update_documents(documents, None).await;
+    index.wait_task(task.uid()).await.succeeded();
+    let (response, code) = index.get_task(task.uid()).await;
     assert_eq!(code, 200);
     assert_eq!(response["type"], "documentAdditionOrUpdate");
     assert_eq!(response["details"]["indexedDocuments"], 77);
@@ -158,8 +157,8 @@ async fn update_larger_dataset() {
 
 #[actix_rt::test]
 async fn error_update_documents_bad_document_id() {
-    let server = Server::new().await;
-    let index = server.index("test");
+    let server = Server::new_shared();
+    let index = server.unique_index();
     index.create(Some("docid")).await;
     let documents = json!([
         {
@@ -167,8 +166,8 @@ async fn error_update_documents_bad_document_id() {
             "content": "foobar"
         }
     ]);
-    index.update_documents(documents, None).await;
-    let response = index.wait_task(1).await;
+    let (task, _code) = index.update_documents(documents, None).await;
+    let response = index.wait_task(task.uid()).await;
     assert_eq!(response["status"], json!("failed"));
     assert_eq!(
         response["error"]["message"],
@@ -186,8 +185,8 @@ async fn error_update_documents_bad_document_id() {
 
 #[actix_rt::test]
 async fn error_update_documents_missing_document_id() {
-    let server = Server::new().await;
-    let index = server.index("test");
+    let server = Server::new_shared();
+    let index = server.unique_index();
     index.create(Some("docid")).await;
     let documents = json!([
         {
@@ -195,8 +194,8 @@ async fn error_update_documents_missing_document_id() {
             "content": "foobar"
         }
     ]);
-    index.update_documents(documents, None).await;
-    let response = index.wait_task(1).await;
+    let (task, _code) = index.update_documents(documents, None).await;
+    let response = index.wait_task(task.uid()).await;
     assert_eq!(response["status"], "failed");
     assert_eq!(
         response["error"]["message"],
@@ -212,8 +211,8 @@ async fn error_update_documents_missing_document_id() {
 
 #[actix_rt::test]
 async fn update_faceted_document() {
-    let server = Server::new().await;
-    let index = server.index("test");
+    let server = Server::new_shared();
+    let index = server.unique_index();
 
     let (response, code) = index
         .update_settings(json!({
@@ -221,7 +220,7 @@ async fn update_faceted_document() {
         }))
         .await;
     assert_eq!("202", code.as_str(), "{:?}", response);
-    index.wait_task(0).await;
+    index.wait_task(response.uid()).await.succeeded();
 
     let documents: Vec<_> = (0..1000)
         .map(|id| {
@@ -232,10 +231,10 @@ async fn update_faceted_document() {
         })
         .collect();
 
-    let (_response, code) = index.add_documents(documents.into(), None).await;
+    let (response, code) = index.add_documents(documents.into(), None).await;
     assert_eq!(code, 202);
 
-    index.wait_task(1).await;
+    index.wait_task(response.uid()).await.succeeded();
 
     let documents = json!([
         {
@@ -247,7 +246,7 @@ async fn update_faceted_document() {
     let (response, code) = index.update_documents(documents, None).await;
     assert_eq!(code, 202, "response: {}", response);
 
-    index.wait_task(2).await;
+    index.wait_task(response.uid()).await.succeeded();
 
     index
         .search(json!({"limit": 10}), |response, code| {

From 186326fe40af73956e520e294cedeaeb96093a78 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 4 Nov 2024 16:33:04 +0100
Subject: [PATCH 75/92] update the macos version

---
 .github/workflows/publish-binaries.yml | 6 +++---
 .github/workflows/test-suite.yml       | 2 +-
 bors.toml                              | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml
index 016a9d282..c53946fea 100644
--- a/.github/workflows/publish-binaries.yml
+++ b/.github/workflows/publish-binaries.yml
@@ -65,9 +65,9 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [macos-12, windows-2022]
+        os: [macos-13, windows-2022]
         include:
-          - os: macos-12
+          - os: macos-13
             artifact_name: meilisearch
             asset_name: meilisearch-macos-amd64
           - os: windows-2022
@@ -90,7 +90,7 @@ jobs:
 
   publish-macos-apple-silicon:
     name: Publish binary for macOS silicon
-    runs-on: macos-12
+    runs-on: macos-13
     needs: check-version
     strategy:
       matrix:
diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml
index ce7fb30b6..90fb03538 100644
--- a/.github/workflows/test-suite.yml
+++ b/.github/workflows/test-suite.yml
@@ -51,7 +51,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [macos-12, windows-2022]
+        os: [macos-13, windows-2022]
     steps:
       - uses: actions/checkout@v3
       - name: Cache dependencies
diff --git a/bors.toml b/bors.toml
index 8750ed993..96e9ef65e 100644
--- a/bors.toml
+++ b/bors.toml
@@ -1,6 +1,6 @@
 status = [
     'Tests on ubuntu-20.04',
-    'Tests on macos-12',
+    'Tests on macos-13',
     'Tests on windows-2022',
     'Run Clippy',
     'Run Rustfmt',

From 362836efb7d5924a485fa3e15171257f40214509 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 28 Oct 2024 11:57:02 +0100
Subject: [PATCH 76/92] make an upgrade module where we'll be able to shove
 each version instead of putting everything in the same file

---
 crates/meilitool/src/main.rs   | 428 +--------------------------------
 meilitool/src/upgrade/mod.rs   |  46 ++++
 meilitool/src/upgrade/v1_10.rs | 279 +++++++++++++++++++++
 meilitool/src/upgrade/v1_9.rs  | 100 ++++++++
 4 files changed, 430 insertions(+), 423 deletions(-)
 create mode 100644 meilitool/src/upgrade/mod.rs
 create mode 100644 meilitool/src/upgrade/v1_10.rs
 create mode 100644 meilitool/src/upgrade/v1_9.rs

diff --git a/crates/meilitool/src/main.rs b/crates/meilitool/src/main.rs
index 9dbff2486..ef137f746 100644
--- a/crates/meilitool/src/main.rs
+++ b/crates/meilitool/src/main.rs
@@ -2,7 +2,7 @@ use std::fs::{read_dir, read_to_string, remove_file, File};
 use std::io::BufWriter;
 use std::path::PathBuf;
 
-use anyhow::{bail, Context};
+use anyhow::Context;
 use clap::{Parser, Subcommand};
 use dump::{DumpWriter, IndexMetadata};
 use file_store::FileStore;
@@ -10,15 +10,16 @@ use meilisearch_auth::AuthController;
 use meilisearch_types::heed::types::{SerdeJson, Str};
 use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified};
 use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
-use meilisearch_types::milli::index::{db_name, main_key};
 use meilisearch_types::milli::{obkv_to_json, BEU32};
 use meilisearch_types::tasks::{Status, Task};
-use meilisearch_types::versioning::{create_version_file, get_version, parse_version};
+use meilisearch_types::versioning::{get_version, parse_version};
 use meilisearch_types::Index;
 use time::macros::format_description;
 use time::OffsetDateTime;
+use upgrade::OfflineUpgrade;
 use uuid_codec::UuidCodec;
 
+mod upgrade;
 mod uuid_codec;
 
 #[derive(Parser)]
@@ -72,7 +73,7 @@ enum Command {
     ///
     /// Supported upgrade paths:
     ///
-    /// - v1.9.0 -> v1.10.0
+    /// - v1.9.0 -> v1.10.0 -> v1.11.0
     OfflineUpgrade {
         #[arg(long)]
         target_version: String,
@@ -96,425 +97,6 @@ fn main() -> anyhow::Result<()> {
     }
 }
 
-struct OfflineUpgrade {
-    db_path: PathBuf,
-    current_version: (String, String, String),
-    target_version: (String, String, String),
-}
-
-impl OfflineUpgrade {
-    fn upgrade(self) -> anyhow::Result<()> {
-        // TODO: if we make this process support more versions, introduce a more flexible way of checking for the version
-        // currently only supports v1.9 to v1.10
-        let (current_major, current_minor, current_patch) = &self.current_version;
-
-        match (current_major.as_str(), current_minor.as_str(), current_patch.as_str()) {
-            ("1", "9", _) => {}
-            _ => {
-                bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9")
-            }
-        }
-
-        let (target_major, target_minor, target_patch) = &self.target_version;
-
-        match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) {
-            ("1", "10", _) => {}
-            _ => {
-                bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.10")
-            }
-        }
-
-        println!("Upgrading from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}");
-
-        self.v1_9_to_v1_10()?;
-
-        println!("Writing VERSION file");
-
-        create_version_file(&self.db_path, target_major, target_minor, target_patch)
-            .context("while writing VERSION file after the upgrade")?;
-
-        println!("Success");
-
-        Ok(())
-    }
-
-    fn v1_9_to_v1_10(&self) -> anyhow::Result<()> {
-        // 2 changes here
-
-        // 1. date format. needs to be done before opening the Index
-        // 2. REST embedders. We don't support this case right now, so bail
-
-        let index_scheduler_path = self.db_path.join("tasks");
-        let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
-            .with_context(|| {
-                format!("While trying to open {:?}", index_scheduler_path.display())
-            })?;
-
-        let mut sched_wtxn = env.write_txn()?;
-
-        let index_mapping: Database<Str, UuidCodec> =
-            try_opening_database(&env, &sched_wtxn, "index-mapping")?;
-
-        let index_stats: Database<UuidCodec, Unspecified> =
-            try_opening_database(&env, &sched_wtxn, "index-stats").with_context(|| {
-                format!("While trying to open {:?}", index_scheduler_path.display())
-            })?;
-
-        let index_count =
-            index_mapping.len(&sched_wtxn).context("while reading the number of indexes")?;
-
-        // FIXME: not ideal, we have to pre-populate all indexes to prevent double borrow of sched_wtxn
-        // 1. immutably for the iteration
-        // 2. mutably for updating index stats
-        let indexes: Vec<_> = index_mapping
-            .iter(&sched_wtxn)?
-            .map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid)))
-            .collect();
-
-        let mut rest_embedders = Vec::new();
-
-        let mut unwrapped_indexes = Vec::new();
-
-        // check that update can take place
-        for (index_index, result) in indexes.into_iter().enumerate() {
-            let (uid, uuid) = result?;
-            let index_path = self.db_path.join("indexes").join(uuid.to_string());
-
-            println!(
-                "[{}/{index_count}]Checking that update can take place for  `{uid}` at `{}`",
-                index_index + 1,
-                index_path.display()
-            );
-
-            let index_env = unsafe {
-                // FIXME: fetch the 25 magic number from the index file
-                EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
-                    format!("while opening index {uid} at '{}'", index_path.display())
-                })?
-            };
-
-            let index_txn = index_env.read_txn().with_context(|| {
-                format!(
-                    "while obtaining a write transaction for index {uid} at {}",
-                    index_path.display()
-                )
-            })?;
-
-            println!("\t- Checking for incompatible embedders (REST embedders)");
-            let rest_embedders_for_index = find_rest_embedders(&uid, &index_env, &index_txn)?;
-
-            if rest_embedders_for_index.is_empty() {
-                unwrapped_indexes.push((uid, uuid));
-            } else {
-                // no need to add to unwrapped indexes because we'll exit early
-                rest_embedders.push((uid, rest_embedders_for_index));
-            }
-        }
-
-        if !rest_embedders.is_empty() {
-            let rest_embedders = rest_embedders
-                .into_iter()
-                .flat_map(|(index, embedders)| std::iter::repeat(index.clone()).zip(embedders))
-                .map(|(index, embedder)| format!("\t- embedder `{embedder}` in index `{index}`"))
-                .collect::<Vec<_>>()
-                .join("\n");
-            bail!("The update cannot take place because there are REST embedder(s). Remove them before proceeding with the update:\n{rest_embedders}\n\n\
-            The database has not been modified and is still a valid v1.9 database.");
-        }
-
-        println!("Update can take place, updating");
-
-        for (index_index, (uid, uuid)) in unwrapped_indexes.into_iter().enumerate() {
-            let index_path = self.db_path.join("indexes").join(uuid.to_string());
-
-            println!(
-                "[{}/{index_count}]Updating index `{uid}` at `{}`",
-                index_index + 1,
-                index_path.display()
-            );
-
-            let index_env = unsafe {
-                // FIXME: fetch the 25 magic number from the index file
-                EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
-                    format!("while opening index {uid} at '{}'", index_path.display())
-                })?
-            };
-
-            let mut index_wtxn = index_env.write_txn().with_context(|| {
-                format!(
-                    "while obtaining a write transaction for index `{uid}` at `{}`",
-                    index_path.display()
-                )
-            })?;
-
-            println!("\t- Updating index stats");
-            update_index_stats(index_stats, &uid, uuid, &mut sched_wtxn)?;
-            println!("\t- Updating date format");
-            update_date_format(&uid, &index_env, &mut index_wtxn)?;
-
-            index_wtxn.commit().with_context(|| {
-                format!(
-                    "while committing the write txn for index `{uid}` at {}",
-                    index_path.display()
-                )
-            })?;
-        }
-
-        sched_wtxn.commit().context("while committing the write txn for the index-scheduler")?;
-
-        println!("Upgrading database succeeded");
-
-        Ok(())
-    }
-}
-
-pub mod v1_9 {
-    pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
-
-    /// The statistics that can be computed from an `Index` object.
-    #[derive(serde::Serialize, serde::Deserialize, Debug)]
-    pub struct IndexStats {
-        /// Number of documents in the index.
-        pub number_of_documents: u64,
-        /// Size taken up by the index' DB, in bytes.
-        ///
-        /// This includes the size taken by both the used and free pages of the DB, and as the free pages
-        /// are not returned to the disk after a deletion, this number is typically larger than
-        /// `used_database_size` that only includes the size of the used pages.
-        pub database_size: u64,
-        /// Size taken by the used pages of the index' DB, in bytes.
-        ///
-        /// As the DB backend does not return to the disk the pages that are not currently used by the DB,
-        /// this value is typically smaller than `database_size`.
-        pub used_database_size: u64,
-        /// Association of every field name with the number of times it occurs in the documents.
-        pub field_distribution: FieldDistribution,
-        /// Creation date of the index.
-        pub created_at: time::OffsetDateTime,
-        /// Date of the last update of the index.
-        pub updated_at: time::OffsetDateTime,
-    }
-
-    use serde::{Deserialize, Serialize};
-
-    #[derive(Debug, Deserialize, Serialize)]
-    pub struct IndexEmbeddingConfig {
-        pub name: String,
-        pub config: EmbeddingConfig,
-    }
-
-    #[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)]
-    pub struct EmbeddingConfig {
-        /// Options of the embedder, specific to each kind of embedder
-        pub embedder_options: EmbedderOptions,
-    }
-
-    /// Options of an embedder, specific to each kind of embedder.
-    #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
-    pub enum EmbedderOptions {
-        HuggingFace(hf::EmbedderOptions),
-        OpenAi(openai::EmbedderOptions),
-        Ollama(ollama::EmbedderOptions),
-        UserProvided(manual::EmbedderOptions),
-        Rest(rest::EmbedderOptions),
-    }
-
-    impl Default for EmbedderOptions {
-        fn default() -> Self {
-            Self::OpenAi(openai::EmbedderOptions { api_key: None, dimensions: None })
-        }
-    }
-
-    mod hf {
-        #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
-        pub struct EmbedderOptions {
-            pub model: String,
-            pub revision: Option<String>,
-        }
-    }
-    mod openai {
-
-        #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
-        pub struct EmbedderOptions {
-            pub api_key: Option<String>,
-            pub dimensions: Option<usize>,
-        }
-    }
-    mod ollama {
-        #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
-        pub struct EmbedderOptions {
-            pub embedding_model: String,
-            pub url: Option<String>,
-            pub api_key: Option<String>,
-        }
-    }
-    mod manual {
-        #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
-        pub struct EmbedderOptions {
-            pub dimensions: usize,
-        }
-    }
-    mod rest {
-        #[derive(Debug, Clone, PartialEq, Eq, serde::Deserialize, serde::Serialize, Hash)]
-        pub struct EmbedderOptions {
-            pub api_key: Option<String>,
-            pub dimensions: Option<usize>,
-            pub url: String,
-            pub input_field: Vec<String>,
-            // path to the array of embeddings
-            pub path_to_embeddings: Vec<String>,
-            // shape of a single embedding
-            pub embedding_object: Vec<String>,
-        }
-    }
-
-    pub type OffsetDateTime = time::OffsetDateTime;
-}
-
-pub mod v1_10 {
-    use crate::v1_9;
-
-    pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
-
-    /// The statistics that can be computed from an `Index` object.
-    #[derive(serde::Serialize, serde::Deserialize, Debug)]
-    pub struct IndexStats {
-        /// Number of documents in the index.
-        pub number_of_documents: u64,
-        /// Size taken up by the index' DB, in bytes.
-        ///
-        /// This includes the size taken by both the used and free pages of the DB, and as the free pages
-        /// are not returned to the disk after a deletion, this number is typically larger than
-        /// `used_database_size` that only includes the size of the used pages.
-        pub database_size: u64,
-        /// Size taken by the used pages of the index' DB, in bytes.
-        ///
-        /// As the DB backend does not return to the disk the pages that are not currently used by the DB,
-        /// this value is typically smaller than `database_size`.
-        pub used_database_size: u64,
-        /// Association of every field name with the number of times it occurs in the documents.
-        pub field_distribution: FieldDistribution,
-        /// Creation date of the index.
-        #[serde(with = "time::serde::rfc3339")]
-        pub created_at: time::OffsetDateTime,
-        /// Date of the last update of the index.
-        #[serde(with = "time::serde::rfc3339")]
-        pub updated_at: time::OffsetDateTime,
-    }
-
-    impl From<v1_9::IndexStats> for IndexStats {
-        fn from(
-            v1_9::IndexStats {
-                number_of_documents,
-                database_size,
-                used_database_size,
-                field_distribution,
-                created_at,
-                updated_at,
-            }: v1_9::IndexStats,
-        ) -> Self {
-            IndexStats {
-                number_of_documents,
-                database_size,
-                used_database_size,
-                field_distribution,
-                created_at,
-                updated_at,
-            }
-        }
-    }
-
-    #[derive(serde::Serialize, serde::Deserialize)]
-    #[serde(transparent)]
-    pub struct OffsetDateTime(#[serde(with = "time::serde::rfc3339")] pub time::OffsetDateTime);
-}
-
-fn update_index_stats(
-    index_stats: Database<UuidCodec, Unspecified>,
-    index_uid: &str,
-    index_uuid: uuid::Uuid,
-    sched_wtxn: &mut RwTxn,
-) -> anyhow::Result<()> {
-    let ctx = || format!("while updating index stats for index `{index_uid}`");
-
-    let stats: Option<v1_9::IndexStats> = index_stats
-        .remap_data_type::<SerdeJson<v1_9::IndexStats>>()
-        .get(sched_wtxn, &index_uuid)
-        .with_context(ctx)?;
-
-    if let Some(stats) = stats {
-        let stats: v1_10::IndexStats = stats.into();
-
-        index_stats
-            .remap_data_type::<SerdeJson<v1_10::IndexStats>>()
-            .put(sched_wtxn, &index_uuid, &stats)
-            .with_context(ctx)?;
-    }
-
-    Ok(())
-}
-
-fn update_date_format(
-    index_uid: &str,
-    index_env: &Env,
-    index_wtxn: &mut RwTxn,
-) -> anyhow::Result<()> {
-    let main = try_opening_poly_database(index_env, index_wtxn, db_name::MAIN)
-        .with_context(|| format!("while updating date format for index `{index_uid}`"))?;
-
-    date_round_trip(index_wtxn, index_uid, main, main_key::CREATED_AT_KEY)?;
-    date_round_trip(index_wtxn, index_uid, main, main_key::UPDATED_AT_KEY)?;
-
-    Ok(())
-}
-
-fn find_rest_embedders(
-    index_uid: &str,
-    index_env: &Env,
-    index_txn: &RoTxn,
-) -> anyhow::Result<Vec<String>> {
-    let main = try_opening_poly_database(index_env, index_txn, db_name::MAIN)
-        .with_context(|| format!("while checking REST embedders for index `{index_uid}`"))?;
-
-    let mut rest_embedders = vec![];
-
-    for config in main
-        .remap_types::<Str, SerdeJson<Vec<v1_9::IndexEmbeddingConfig>>>()
-        .get(index_txn, main_key::EMBEDDING_CONFIGS)?
-        .unwrap_or_default()
-    {
-        if let v1_9::EmbedderOptions::Rest(_) = config.config.embedder_options {
-            rest_embedders.push(config.name);
-        }
-    }
-
-    Ok(rest_embedders)
-}
-
-fn date_round_trip(
-    wtxn: &mut RwTxn,
-    index_uid: &str,
-    db: Database<Unspecified, Unspecified>,
-    key: &str,
-) -> anyhow::Result<()> {
-    let datetime =
-        db.remap_types::<Str, SerdeJson<v1_9::OffsetDateTime>>().get(wtxn, key).with_context(
-            || format!("could not read `{key}` while updating date format for index `{index_uid}`"),
-        )?;
-
-    if let Some(datetime) = datetime {
-        db.remap_types::<Str, SerdeJson<v1_10::OffsetDateTime>>()
-            .put(wtxn, key, &v1_10::OffsetDateTime(datetime))
-            .with_context(|| {
-                format!(
-                    "could not write `{key}` while updating date format for index `{index_uid}`"
-                )
-            })?;
-    }
-
-    Ok(())
-}
-
 /// Clears the task queue located at `db_path`.
 fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
     let path = db_path.join("tasks");
diff --git a/meilitool/src/upgrade/mod.rs b/meilitool/src/upgrade/mod.rs
new file mode 100644
index 000000000..053c61c14
--- /dev/null
+++ b/meilitool/src/upgrade/mod.rs
@@ -0,0 +1,46 @@
+mod v1_10;
+mod v1_9;
+
+use std::path::PathBuf;
+
+use anyhow::{bail, Context};
+use meilisearch_types::versioning::create_version_file;
+
+use v1_10::v1_9_to_v1_10;
+
+pub struct OfflineUpgrade {
+    pub db_path: PathBuf,
+    pub current_version: (String, String, String),
+    pub target_version: (String, String, String),
+}
+
+impl OfflineUpgrade {
+    pub fn upgrade(self) -> anyhow::Result<()> {
+        let (current_major, current_minor, current_patch) = &self.current_version;
+        let (target_major, target_minor, target_patch) = &self.target_version;
+
+        println!("Upgrading from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}");
+
+        match (
+            (current_major.as_str(), current_minor.as_str(), current_patch.as_str()),
+            (target_major.as_str(), target_minor.as_str(), target_patch.as_str()),
+        ) {
+            (("1", "9", _), ("1", "10", _)) => v1_9_to_v1_10(&self.db_path)?,
+            ((major, minor, _), _) if major != "1" && minor != "9" =>
+                bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9"),
+            (_, (major, minor, _)) if major != "1" && minor != "10" =>
+                bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.10"),
+            _ => 
+                bail!("Unsupported upgrade from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}. Can only upgrade from v1.9 to v1.10"),
+        }
+
+        println!("Writing VERSION file");
+
+        create_version_file(&self.db_path, target_major, target_minor, target_patch)
+            .context("while writing VERSION file after the upgrade")?;
+
+        println!("Success");
+
+        Ok(())
+    }
+}
diff --git a/meilitool/src/upgrade/v1_10.rs b/meilitool/src/upgrade/v1_10.rs
new file mode 100644
index 000000000..96af99c39
--- /dev/null
+++ b/meilitool/src/upgrade/v1_10.rs
@@ -0,0 +1,279 @@
+use anyhow::bail;
+use std::path::Path;
+
+use anyhow::Context;
+use meilisearch_types::{
+    heed::{
+        types::{SerdeJson, Str},
+        Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified,
+    },
+    milli::index::{db_name, main_key},
+};
+
+use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec};
+
+use super::v1_9;
+
+pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
+
+/// The statistics that can be computed from an `Index` object.
+#[derive(serde::Serialize, serde::Deserialize, Debug)]
+pub struct IndexStats {
+    /// Number of documents in the index.
+    pub number_of_documents: u64,
+    /// Size taken up by the index' DB, in bytes.
+    ///
+    /// This includes the size taken by both the used and free pages of the DB, and as the free pages
+    /// are not returned to the disk after a deletion, this number is typically larger than
+    /// `used_database_size` that only includes the size of the used pages.
+    pub database_size: u64,
+    /// Size taken by the used pages of the index' DB, in bytes.
+    ///
+    /// As the DB backend does not return to the disk the pages that are not currently used by the DB,
+    /// this value is typically smaller than `database_size`.
+    pub used_database_size: u64,
+    /// Association of every field name with the number of times it occurs in the documents.
+    pub field_distribution: FieldDistribution,
+    /// Creation date of the index.
+    #[serde(with = "time::serde::rfc3339")]
+    pub created_at: time::OffsetDateTime,
+    /// Date of the last update of the index.
+    #[serde(with = "time::serde::rfc3339")]
+    pub updated_at: time::OffsetDateTime,
+}
+
+impl From<v1_9::IndexStats> for IndexStats {
+    fn from(
+        v1_9::IndexStats {
+            number_of_documents,
+            database_size,
+            used_database_size,
+            field_distribution,
+            created_at,
+            updated_at,
+        }: v1_9::IndexStats,
+    ) -> Self {
+        IndexStats {
+            number_of_documents,
+            database_size,
+            used_database_size,
+            field_distribution,
+            created_at,
+            updated_at,
+        }
+    }
+}
+
+#[derive(serde::Serialize, serde::Deserialize)]
+#[serde(transparent)]
+pub struct OffsetDateTime(#[serde(with = "time::serde::rfc3339")] pub time::OffsetDateTime);
+
+fn update_index_stats(
+    index_stats: Database<UuidCodec, Unspecified>,
+    index_uid: &str,
+    index_uuid: uuid::Uuid,
+    sched_wtxn: &mut RwTxn,
+) -> anyhow::Result<()> {
+    let ctx = || format!("while updating index stats for index `{index_uid}`");
+
+    let stats: Option<v1_9::IndexStats> = index_stats
+        .remap_data_type::<SerdeJson<v1_9::IndexStats>>()
+        .get(sched_wtxn, &index_uuid)
+        .with_context(ctx)?;
+
+    if let Some(stats) = stats {
+        let stats: self::IndexStats = stats.into();
+
+        index_stats
+            .remap_data_type::<SerdeJson<self::IndexStats>>()
+            .put(sched_wtxn, &index_uuid, &stats)
+            .with_context(ctx)?;
+    }
+
+    Ok(())
+}
+
+fn update_date_format(
+    index_uid: &str,
+    index_env: &Env,
+    index_wtxn: &mut RwTxn,
+) -> anyhow::Result<()> {
+    let main = try_opening_poly_database(index_env, index_wtxn, db_name::MAIN)
+        .with_context(|| format!("while updating date format for index `{index_uid}`"))?;
+
+    date_round_trip(index_wtxn, index_uid, main, main_key::CREATED_AT_KEY)?;
+    date_round_trip(index_wtxn, index_uid, main, main_key::UPDATED_AT_KEY)?;
+
+    Ok(())
+}
+
+fn find_rest_embedders(
+    index_uid: &str,
+    index_env: &Env,
+    index_txn: &RoTxn,
+) -> anyhow::Result<Vec<String>> {
+    let main = try_opening_poly_database(index_env, index_txn, db_name::MAIN)
+        .with_context(|| format!("while checking REST embedders for index `{index_uid}`"))?;
+
+    let mut rest_embedders = vec![];
+
+    for config in main
+        .remap_types::<Str, SerdeJson<Vec<v1_9::IndexEmbeddingConfig>>>()
+        .get(index_txn, main_key::EMBEDDING_CONFIGS)?
+        .unwrap_or_default()
+    {
+        if let v1_9::EmbedderOptions::Rest(_) = config.config.embedder_options {
+            rest_embedders.push(config.name);
+        }
+    }
+
+    Ok(rest_embedders)
+}
+
+fn date_round_trip(
+    wtxn: &mut RwTxn,
+    index_uid: &str,
+    db: Database<Unspecified, Unspecified>,
+    key: &str,
+) -> anyhow::Result<()> {
+    let datetime =
+        db.remap_types::<Str, SerdeJson<v1_9::OffsetDateTime>>().get(wtxn, key).with_context(
+            || format!("could not read `{key}` while updating date format for index `{index_uid}`"),
+        )?;
+
+    if let Some(datetime) = datetime {
+        db.remap_types::<Str, SerdeJson<self::OffsetDateTime>>()
+            .put(wtxn, key, &self::OffsetDateTime(datetime))
+            .with_context(|| {
+                format!(
+                    "could not write `{key}` while updating date format for index `{index_uid}`"
+                )
+            })?;
+    }
+
+    Ok(())
+}
+
+pub fn v1_9_to_v1_10(db_path: &Path) -> anyhow::Result<()> {
+    // 2 changes here
+
+    // 1. date format. needs to be done before opening the Index
+    // 2. REST embedders. We don't support this case right now, so bail
+
+    let index_scheduler_path = db_path.join("tasks");
+    let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
+        .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
+
+    let mut sched_wtxn = env.write_txn()?;
+
+    let index_mapping: Database<Str, UuidCodec> =
+        try_opening_database(&env, &sched_wtxn, "index-mapping")?;
+
+    let index_stats: Database<UuidCodec, Unspecified> =
+        try_opening_database(&env, &sched_wtxn, "index-stats").with_context(|| {
+            format!("While trying to open {:?}", index_scheduler_path.display())
+        })?;
+
+    let index_count =
+        index_mapping.len(&sched_wtxn).context("while reading the number of indexes")?;
+
+    // FIXME: not ideal, we have to pre-populate all indexes to prevent double borrow of sched_wtxn
+    // 1. immutably for the iteration
+    // 2. mutably for updating index stats
+    let indexes: Vec<_> = index_mapping
+        .iter(&sched_wtxn)?
+        .map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid)))
+        .collect();
+
+    let mut rest_embedders = Vec::new();
+
+    let mut unwrapped_indexes = Vec::new();
+
+    // check that update can take place
+    for (index_index, result) in indexes.into_iter().enumerate() {
+        let (uid, uuid) = result?;
+        let index_path = db_path.join("indexes").join(uuid.to_string());
+
+        println!(
+            "[{}/{index_count}]Checking that update can take place for  `{uid}` at `{}`",
+            index_index + 1,
+            index_path.display()
+        );
+
+        let index_env = unsafe {
+            // FIXME: fetch the 25 magic number from the index file
+            EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
+                format!("while opening index {uid} at '{}'", index_path.display())
+            })?
+        };
+
+        let index_txn = index_env.read_txn().with_context(|| {
+            format!(
+                "while obtaining a write transaction for index {uid} at {}",
+                index_path.display()
+            )
+        })?;
+
+        println!("\t- Checking for incompatible embedders (REST embedders)");
+        let rest_embedders_for_index = find_rest_embedders(&uid, &index_env, &index_txn)?;
+
+        if rest_embedders_for_index.is_empty() {
+            unwrapped_indexes.push((uid, uuid));
+        } else {
+            // no need to add to unwrapped indexes because we'll exit early
+            rest_embedders.push((uid, rest_embedders_for_index));
+        }
+    }
+
+    if !rest_embedders.is_empty() {
+        let rest_embedders = rest_embedders
+            .into_iter()
+            .flat_map(|(index, embedders)| std::iter::repeat(index.clone()).zip(embedders))
+            .map(|(index, embedder)| format!("\t- embedder `{embedder}` in index `{index}`"))
+            .collect::<Vec<_>>()
+            .join("\n");
+        bail!("The update cannot take place because there are REST embedder(s). Remove them before proceeding with the update:\n{rest_embedders}\n\n\
+            The database has not been modified and is still a valid v1.9 database.");
+    }
+
+    println!("Update can take place, updating");
+
+    for (index_index, (uid, uuid)) in unwrapped_indexes.into_iter().enumerate() {
+        let index_path = db_path.join("indexes").join(uuid.to_string());
+
+        println!(
+            "[{}/{index_count}]Updating index `{uid}` at `{}`",
+            index_index + 1,
+            index_path.display()
+        );
+
+        let index_env = unsafe {
+            // FIXME: fetch the 25 magic number from the index file
+            EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
+                format!("while opening index {uid} at '{}'", index_path.display())
+            })?
+        };
+
+        let mut index_wtxn = index_env.write_txn().with_context(|| {
+            format!(
+                "while obtaining a write transaction for index `{uid}` at `{}`",
+                index_path.display()
+            )
+        })?;
+
+        println!("\t- Updating index stats");
+        update_index_stats(index_stats, &uid, uuid, &mut sched_wtxn)?;
+        println!("\t- Updating date format");
+        update_date_format(&uid, &index_env, &mut index_wtxn)?;
+
+        index_wtxn.commit().with_context(|| {
+            format!("while committing the write txn for index `{uid}` at {}", index_path.display())
+        })?;
+    }
+
+    sched_wtxn.commit().context("while committing the write txn for the index-scheduler")?;
+
+    println!("Upgrading database succeeded");
+
+    Ok(())
+}
diff --git a/meilitool/src/upgrade/v1_9.rs b/meilitool/src/upgrade/v1_9.rs
new file mode 100644
index 000000000..faa2d9814
--- /dev/null
+++ b/meilitool/src/upgrade/v1_9.rs
@@ -0,0 +1,100 @@
+use serde::{Deserialize, Serialize};
+
+pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
+
+/// The statistics that can be computed from an `Index` object.
+#[derive(serde::Serialize, serde::Deserialize, Debug)]
+pub struct IndexStats {
+    /// Number of documents in the index.
+    pub number_of_documents: u64,
+    /// Size taken up by the index' DB, in bytes.
+    ///
+    /// This includes the size taken by both the used and free pages of the DB, and as the free pages
+    /// are not returned to the disk after a deletion, this number is typically larger than
+    /// `used_database_size` that only includes the size of the used pages.
+    pub database_size: u64,
+    /// Size taken by the used pages of the index' DB, in bytes.
+    ///
+    /// As the DB backend does not return to the disk the pages that are not currently used by the DB,
+    /// this value is typically smaller than `database_size`.
+    pub used_database_size: u64,
+    /// Association of every field name with the number of times it occurs in the documents.
+    pub field_distribution: FieldDistribution,
+    /// Creation date of the index.
+    pub created_at: time::OffsetDateTime,
+    /// Date of the last update of the index.
+    pub updated_at: time::OffsetDateTime,
+}
+
+#[derive(Debug, Deserialize, Serialize)]
+pub struct IndexEmbeddingConfig {
+    pub name: String,
+    pub config: EmbeddingConfig,
+}
+
+#[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)]
+pub struct EmbeddingConfig {
+    /// Options of the embedder, specific to each kind of embedder
+    pub embedder_options: EmbedderOptions,
+}
+
+/// Options of an embedder, specific to each kind of embedder.
+#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
+pub enum EmbedderOptions {
+    HuggingFace(hf::EmbedderOptions),
+    OpenAi(openai::EmbedderOptions),
+    Ollama(ollama::EmbedderOptions),
+    UserProvided(manual::EmbedderOptions),
+    Rest(rest::EmbedderOptions),
+}
+
+impl Default for EmbedderOptions {
+    fn default() -> Self {
+        Self::OpenAi(openai::EmbedderOptions { api_key: None, dimensions: None })
+    }
+}
+
+mod hf {
+    #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
+    pub struct EmbedderOptions {
+        pub model: String,
+        pub revision: Option<String>,
+    }
+}
+mod openai {
+
+    #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
+    pub struct EmbedderOptions {
+        pub api_key: Option<String>,
+        pub dimensions: Option<usize>,
+    }
+}
+mod ollama {
+    #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
+    pub struct EmbedderOptions {
+        pub embedding_model: String,
+        pub url: Option<String>,
+        pub api_key: Option<String>,
+    }
+}
+mod manual {
+    #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
+    pub struct EmbedderOptions {
+        pub dimensions: usize,
+    }
+}
+mod rest {
+    #[derive(Debug, Clone, PartialEq, Eq, serde::Deserialize, serde::Serialize, Hash)]
+    pub struct EmbedderOptions {
+        pub api_key: Option<String>,
+        pub dimensions: Option<usize>,
+        pub url: String,
+        pub input_field: Vec<String>,
+        // path to the array of embeddings
+        pub path_to_embeddings: Vec<String>,
+        // shape of a single embedding
+        pub embedding_object: Vec<String>,
+    }
+}
+
+pub type OffsetDateTime = time::OffsetDateTime;

From ddd03e9b370f145787bca447b8791aeff5485c94 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 29 Oct 2024 02:46:14 +0100
Subject: [PATCH 77/92] implement the upgrade from v1.10 to v1.11 in meilitool

---
 Cargo.lock                     | 28 +++++++++++--
 crates/meilitool/Cargo.toml    |  2 +
 crates/milli/Cargo.toml        |  2 +-
 meilitool/src/upgrade/mod.rs   | 60 +++++++++++++++++++--------
 meilitool/src/upgrade/v1_10.rs |  7 +++-
 meilitool/src/upgrade/v1_11.rs | 76 ++++++++++++++++++++++++++++++++++
 6 files changed, 150 insertions(+), 25 deletions(-)
 create mode 100644 meilitool/src/upgrade/v1_11.rs

diff --git a/Cargo.lock b/Cargo.lock
index 500f28454..43a93bb05 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -404,6 +404,25 @@ dependencies = [
  "thiserror",
 ]
 
+[[package]]
+name = "arroy"
+version = "0.5.0"
+source = "git+https://github.com/meilisearch/arroy/?rev=3908c9e#3908c9edfba77ba18cc50bda41c88166ba5ebd37"
+dependencies = [
+ "bytemuck",
+ "byteorder",
+ "heed",
+ "log",
+ "memmap2",
+ "nohash",
+ "ordered-float",
+ "rand",
+ "rayon",
+ "roaring",
+ "tempfile",
+ "thiserror",
+]
+
 [[package]]
 name = "assert-json-diff"
 version = "2.0.2"
@@ -707,9 +726,9 @@ checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c"
 
 [[package]]
 name = "bytemuck"
-version = "1.16.1"
+version = "1.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e"
+checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d"
 dependencies = [
  "bytemuck_derive",
 ]
@@ -2556,7 +2575,7 @@ name = "index-scheduler"
 version = "1.11.0"
 dependencies = [
  "anyhow",
- "arroy",
+ "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "big_s",
  "bincode",
  "crossbeam",
@@ -3517,6 +3536,7 @@ name = "meilitool"
 version = "1.11.0"
 dependencies = [
  "anyhow",
+ "arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?rev=3908c9e)",
  "clap",
  "dump",
  "file-store",
@@ -3547,7 +3567,7 @@ dependencies = [
 name = "milli"
 version = "1.11.0"
 dependencies = [
- "arroy",
+ "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "big_s",
  "bimap",
  "bincode",
diff --git a/crates/meilitool/Cargo.toml b/crates/meilitool/Cargo.toml
index ce6c1ad5b..937a484e2 100644
--- a/crates/meilitool/Cargo.toml
+++ b/crates/meilitool/Cargo.toml
@@ -18,3 +18,5 @@ meilisearch-types = { path = "../meilisearch-types" }
 serde = { version = "1.0.209", features = ["derive"] }
 time = { version = "0.3.36", features = ["formatting"] }
 uuid = { version = "1.10.0", features = ["v4"], default-features = false }
+arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", rev = "3908c9e" }
+
diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml
index df0e59496..7b43fbf33 100644
--- a/crates/milli/Cargo.toml
+++ b/crates/milli/Cargo.toml
@@ -15,7 +15,7 @@ license.workspace = true
 bimap = { version = "0.6.3", features = ["serde"] }
 bincode = "1.3.3"
 bstr = "1.9.1"
-bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] }
+bytemuck = { version = "1.18.0", features = ["extern_crate_alloc"] }
 byteorder = "1.5.0"
 charabia = { version = "0.9.1", default-features = false }
 concat-arrays = "0.1.2"
diff --git a/meilitool/src/upgrade/mod.rs b/meilitool/src/upgrade/mod.rs
index 053c61c14..9a1e4286f 100644
--- a/meilitool/src/upgrade/mod.rs
+++ b/meilitool/src/upgrade/mod.rs
@@ -1,13 +1,16 @@
 mod v1_10;
+mod v1_11;
 mod v1_9;
 
-use std::path::PathBuf;
+use std::path::{Path, PathBuf};
 
 use anyhow::{bail, Context};
 use meilisearch_types::versioning::create_version_file;
 
 use v1_10::v1_9_to_v1_10;
 
+use crate::upgrade::v1_11::v1_10_to_v1_11;
+
 pub struct OfflineUpgrade {
     pub db_path: PathBuf,
     pub current_version: (String, String, String),
@@ -16,29 +19,50 @@ pub struct OfflineUpgrade {
 
 impl OfflineUpgrade {
     pub fn upgrade(self) -> anyhow::Result<()> {
+        let upgrade_list = [
+            (v1_9_to_v1_10 as fn(&Path) -> Result<(), anyhow::Error>, "1", "10", "0"),
+            (v1_10_to_v1_11, "1", "11", "0"),
+        ];
+
         let (current_major, current_minor, current_patch) = &self.current_version;
+
+        let start_at = match (
+            current_major.as_str(),
+            current_minor.as_str(),
+            current_patch.as_str(),
+        ) {
+            ("1", "9", _) => 0,
+            ("1", "10", _) => 1,
+            _ => {
+                bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9")
+            }
+        };
+
         let (target_major, target_minor, target_patch) = &self.target_version;
 
-        println!("Upgrading from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}");
+        let ends_at = match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) {
+            ("v1", "10", _) => 0,
+            ("v1", "11", _) => 1,
+            _ => {
+                bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.11")
+            }
+        };
 
-        match (
-            (current_major.as_str(), current_minor.as_str(), current_patch.as_str()),
-            (target_major.as_str(), target_minor.as_str(), target_patch.as_str()),
-        ) {
-            (("1", "9", _), ("1", "10", _)) => v1_9_to_v1_10(&self.db_path)?,
-            ((major, minor, _), _) if major != "1" && minor != "9" =>
-                bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9"),
-            (_, (major, minor, _)) if major != "1" && minor != "10" =>
-                bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.10"),
-            _ => 
-                bail!("Unsupported upgrade from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}. Can only upgrade from v1.9 to v1.10"),
+        println!("Starting the upgrade from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}");
+
+        #[allow(clippy::needless_range_loop)]
+        for index in start_at..=ends_at {
+            let (func, major, minor, patch) = upgrade_list[index];
+            (func)(&self.db_path)?;
+            println!("Done");
+            // We're writing the version file just in case an issue arise _while_ upgrading.
+            // We don't want the DB to fail in an unknown state.
+            println!("Writing VERSION file");
+
+            create_version_file(&self.db_path, major, minor, patch)
+                .context("while writing VERSION file after the upgrade")?;
         }
 
-        println!("Writing VERSION file");
-
-        create_version_file(&self.db_path, target_major, target_minor, target_patch)
-            .context("while writing VERSION file after the upgrade")?;
-
         println!("Success");
 
         Ok(())
diff --git a/meilitool/src/upgrade/v1_10.rs b/meilitool/src/upgrade/v1_10.rs
index 96af99c39..99fe104e3 100644
--- a/meilitool/src/upgrade/v1_10.rs
+++ b/meilitool/src/upgrade/v1_10.rs
@@ -79,7 +79,8 @@ fn update_index_stats(
     let stats: Option<v1_9::IndexStats> = index_stats
         .remap_data_type::<SerdeJson<v1_9::IndexStats>>()
         .get(sched_wtxn, &index_uuid)
-        .with_context(ctx)?;
+        .with_context(ctx)
+        .with_context(|| "While reading value")?;
 
     if let Some(stats) = stats {
         let stats: self::IndexStats = stats.into();
@@ -87,7 +88,8 @@ fn update_index_stats(
         index_stats
             .remap_data_type::<SerdeJson<self::IndexStats>>()
             .put(sched_wtxn, &index_uuid, &stats)
-            .with_context(ctx)?;
+            .with_context(ctx)
+            .with_context(|| "While writing value")?;
     }
 
     Ok(())
@@ -155,6 +157,7 @@ fn date_round_trip(
 }
 
 pub fn v1_9_to_v1_10(db_path: &Path) -> anyhow::Result<()> {
+    println!("Upgrading from v1.9.0 to v1.10.0");
     // 2 changes here
 
     // 1. date format. needs to be done before opening the Index
diff --git a/meilitool/src/upgrade/v1_11.rs b/meilitool/src/upgrade/v1_11.rs
new file mode 100644
index 000000000..26c4234f6
--- /dev/null
+++ b/meilitool/src/upgrade/v1_11.rs
@@ -0,0 +1,76 @@
+//! The breaking changes that happened between the v1.10 and the v1.11 are:
+//! - Arroy went from the v0.4.0 to the v0.5.0, see this release note to get the whole context: https://github.com/meilisearch/arroy/releases/tag/v0.5.0
+//!   - The `angular` distance has been renamed to `cosine` => We only need to update the string in the metadata.
+//!   - Reorganize the `NodeId` to make the appending of vectors work => We'll have to update the keys of almost all items in the DB.
+//!   - Store the list of updated IDs directly in LMDB instead of a roaring bitmap => This shouldn't be an issue since we are never supposed to commit this roaring bitmap, but it's not forbidden by arroy so ensuring it works is probably better than anything.
+
+use std::path::Path;
+
+use anyhow::Context;
+use meilisearch_types::{
+    heed::{types::Str, Database, EnvOpenOptions},
+    milli::index::db_name,
+};
+
+use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec};
+
+pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> {
+    println!("Upgrading from v1.10.0 to v1.11.0");
+
+    let index_scheduler_path = db_path.join("tasks");
+    let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
+        .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
+
+    let sched_rtxn = env.read_txn()?;
+
+    let index_mapping: Database<Str, UuidCodec> =
+        try_opening_database(&env, &sched_rtxn, "index-mapping")?;
+
+    let index_count =
+        index_mapping.len(&sched_rtxn).context("while reading the number of indexes")?;
+
+    let indexes: Vec<_> = index_mapping
+        .iter(&sched_rtxn)?
+        .map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid)))
+        .collect();
+
+    // check that update can take place
+    for (index_index, result) in indexes.into_iter().enumerate() {
+        let (uid, uuid) = result?;
+        let index_path = db_path.join("indexes").join(uuid.to_string());
+
+        println!(
+            "[{}/{index_count}]Checking that update can take place for  `{uid}` at `{}`",
+            index_index + 1,
+            index_path.display()
+        );
+
+        let index_env = unsafe {
+            EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
+                format!("while opening index {uid} at '{}'", index_path.display())
+            })?
+        };
+
+        let index_rtxn = index_env.read_txn().with_context(|| {
+            format!(
+                "while obtaining a read transaction for index {uid} at {}",
+                index_path.display()
+            )
+        })?;
+        let mut index_wtxn = index_env.write_txn().with_context(|| {
+            format!(
+                "while obtaining a write transaction for index {uid} at {}",
+                index_path.display()
+            )
+        })?;
+
+        let database = try_opening_poly_database(&index_env, &index_rtxn, db_name::VECTOR_ARROY)
+            .with_context(|| format!("while updating date format for index `{uid}`"))?;
+
+        arroy_v04_to_v05::ugrade_from_prev_version(&index_rtxn, &mut index_wtxn, database)?;
+
+        index_wtxn.commit()?;
+    }
+
+    Ok(())
+}

From a9b61c84349e23cf34ce9ed342ec46339c36eb9a Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 29 Oct 2024 02:51:26 +0100
Subject: [PATCH 78/92] fix the version parsing and improve error handling

---
 meilitool/src/upgrade/mod.rs | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/meilitool/src/upgrade/mod.rs b/meilitool/src/upgrade/mod.rs
index 9a1e4286f..ae095b6bd 100644
--- a/meilitool/src/upgrade/mod.rs
+++ b/meilitool/src/upgrade/mod.rs
@@ -41,8 +41,11 @@ impl OfflineUpgrade {
         let (target_major, target_minor, target_patch) = &self.target_version;
 
         let ends_at = match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) {
-            ("v1", "10", _) => 0,
-            ("v1", "11", _) => 1,
+            ("1", "10", _) => 0,
+            ("1", "11", _) => 1,
+            (major, _, _) if major.starts_with('v') => {
+                bail!("Target version must not starts with a `v`. Instead of writing `v1.9.0` write `1.9.0` for example.")
+            }
             _ => {
                 bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.11")
             }

From 690eb42fc09db277d8426aeaa1d54e54001e1501 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 29 Oct 2024 03:27:26 +0100
Subject: [PATCH 79/92] update the version of arroy

---
 Cargo.lock                     |  4 ++--
 crates/meilitool/Cargo.toml    |  3 +--
 meilitool/src/upgrade/v1_11.rs | 16 +++++++++++++---
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 43a93bb05..fd14a4a7d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -407,7 +407,7 @@ dependencies = [
 [[package]]
 name = "arroy"
 version = "0.5.0"
-source = "git+https://github.com/meilisearch/arroy/?rev=3908c9e#3908c9edfba77ba18cc50bda41c88166ba5ebd37"
+source = "git+https://github.com/meilisearch/arroy/?rev=32670e7dd8b93640fcb53261ace89bda1c06497b#32670e7dd8b93640fcb53261ace89bda1c06497b"
 dependencies = [
  "bytemuck",
  "byteorder",
@@ -3536,7 +3536,7 @@ name = "meilitool"
 version = "1.11.0"
 dependencies = [
  "anyhow",
- "arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?rev=3908c9e)",
+ "arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?rev=32670e7dd8b93640fcb53261ace89bda1c06497b)",
  "clap",
  "dump",
  "file-store",
diff --git a/crates/meilitool/Cargo.toml b/crates/meilitool/Cargo.toml
index 937a484e2..693de6da8 100644
--- a/crates/meilitool/Cargo.toml
+++ b/crates/meilitool/Cargo.toml
@@ -18,5 +18,4 @@ meilisearch-types = { path = "../meilisearch-types" }
 serde = { version = "1.0.209", features = ["derive"] }
 time = { version = "0.3.36", features = ["formatting"] }
 uuid = { version = "1.10.0", features = ["v4"], default-features = false }
-arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", rev = "3908c9e" }
-
+arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", rev = "32670e7dd8b93640fcb53261ace89bda1c06497b" }
diff --git a/meilitool/src/upgrade/v1_11.rs b/meilitool/src/upgrade/v1_11.rs
index 26c4234f6..4105879fd 100644
--- a/meilitool/src/upgrade/v1_11.rs
+++ b/meilitool/src/upgrade/v1_11.rs
@@ -57,6 +57,10 @@ pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> {
                 index_path.display()
             )
         })?;
+        let index_read_database =
+            try_opening_poly_database(&index_env, &index_rtxn, db_name::VECTOR_ARROY)
+                .with_context(|| format!("while updating date format for index `{uid}`"))?;
+
         let mut index_wtxn = index_env.write_txn().with_context(|| {
             format!(
                 "while obtaining a write transaction for index {uid} at {}",
@@ -64,10 +68,16 @@ pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> {
             )
         })?;
 
-        let database = try_opening_poly_database(&index_env, &index_rtxn, db_name::VECTOR_ARROY)
-            .with_context(|| format!("while updating date format for index `{uid}`"))?;
+        let index_write_database =
+            try_opening_poly_database(&index_env, &index_wtxn, db_name::VECTOR_ARROY)
+                .with_context(|| format!("while updating date format for index `{uid}`"))?;
 
-        arroy_v04_to_v05::ugrade_from_prev_version(&index_rtxn, &mut index_wtxn, database)?;
+        arroy_v04_to_v05::ugrade_from_prev_version(
+            &index_rtxn,
+            index_read_database,
+            &mut index_wtxn,
+            index_write_database,
+        )?;
 
         index_wtxn.commit()?;
     }

From 5f57306858b86c4ca8755cffbb4e3d2dd36ffbfa Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 4 Nov 2024 11:46:36 +0100
Subject: [PATCH 80/92] update the arroy version in meilitool

---
 Cargo.lock                  | 4 ++--
 crates/meilitool/Cargo.toml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index fd14a4a7d..04812fd1b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -407,7 +407,7 @@ dependencies = [
 [[package]]
 name = "arroy"
 version = "0.5.0"
-source = "git+https://github.com/meilisearch/arroy/?rev=32670e7dd8b93640fcb53261ace89bda1c06497b#32670e7dd8b93640fcb53261ace89bda1c06497b"
+source = "git+https://github.com/meilisearch/arroy/?rev=053807bf38dc079f25b003f19fc30fbf3613f6e7#053807bf38dc079f25b003f19fc30fbf3613f6e7"
 dependencies = [
  "bytemuck",
  "byteorder",
@@ -3536,7 +3536,7 @@ name = "meilitool"
 version = "1.11.0"
 dependencies = [
  "anyhow",
- "arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?rev=32670e7dd8b93640fcb53261ace89bda1c06497b)",
+ "arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?rev=053807bf38dc079f25b003f19fc30fbf3613f6e7)",
  "clap",
  "dump",
  "file-store",
diff --git a/crates/meilitool/Cargo.toml b/crates/meilitool/Cargo.toml
index 693de6da8..f2c8920c9 100644
--- a/crates/meilitool/Cargo.toml
+++ b/crates/meilitool/Cargo.toml
@@ -18,4 +18,4 @@ meilisearch-types = { path = "../meilisearch-types" }
 serde = { version = "1.0.209", features = ["derive"] }
 time = { version = "0.3.36", features = ["formatting"] }
 uuid = { version = "1.10.0", features = ["v4"], default-features = false }
-arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", rev = "32670e7dd8b93640fcb53261ace89bda1c06497b" }
+arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", rev = "053807bf38dc079f25b003f19fc30fbf3613f6e7" }

From 4eef0cd332168e60c38b9115560e1180d0a13d8e Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 4 Nov 2024 15:50:38 +0100
Subject: [PATCH 81/92] fix the update from v1_9 to v1_10 by providing a custom
 datetime formatter myself

---
 meilitool/src/upgrade/v1_10.rs | 19 +++++++++++++------
 meilitool/src/upgrade/v1_9.rs  | 12 +++++++++---
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/meilitool/src/upgrade/v1_10.rs b/meilitool/src/upgrade/v1_10.rs
index 99fe104e3..671f4d6d2 100644
--- a/meilitool/src/upgrade/v1_10.rs
+++ b/meilitool/src/upgrade/v1_10.rs
@@ -58,8 +58,8 @@ impl From<v1_9::IndexStats> for IndexStats {
             database_size,
             used_database_size,
             field_distribution,
-            created_at,
-            updated_at,
+            created_at: created_at.0,
+            updated_at: updated_at.0,
         }
     }
 }
@@ -76,6 +76,13 @@ fn update_index_stats(
 ) -> anyhow::Result<()> {
     let ctx = || format!("while updating index stats for index `{index_uid}`");
 
+    let stats: Option<&str> = index_stats
+        .remap_data_type::<Str>()
+        .get(sched_wtxn, &index_uuid)
+        .with_context(ctx)
+        .with_context(|| "While reading value")?;
+    dbg!(stats);
+
     let stats: Option<v1_9::IndexStats> = index_stats
         .remap_data_type::<SerdeJson<v1_9::IndexStats>>()
         .get(sched_wtxn, &index_uuid)
@@ -139,13 +146,13 @@ fn date_round_trip(
     key: &str,
 ) -> anyhow::Result<()> {
     let datetime =
-        db.remap_types::<Str, SerdeJson<v1_9::OffsetDateTime>>().get(wtxn, key).with_context(
-            || format!("could not read `{key}` while updating date format for index `{index_uid}`"),
-        )?;
+        db.remap_types::<Str, SerdeJson<v1_9::LegacyTime>>().get(wtxn, key).with_context(|| {
+            format!("could not read `{key}` while updating date format for index `{index_uid}`")
+        })?;
 
     if let Some(datetime) = datetime {
         db.remap_types::<Str, SerdeJson<self::OffsetDateTime>>()
-            .put(wtxn, key, &self::OffsetDateTime(datetime))
+            .put(wtxn, key, &self::OffsetDateTime(datetime.0))
             .with_context(|| {
                 format!(
                     "could not write `{key}` while updating date format for index `{index_uid}`"
diff --git a/meilitool/src/upgrade/v1_9.rs b/meilitool/src/upgrade/v1_9.rs
index faa2d9814..3e6cfde6c 100644
--- a/meilitool/src/upgrade/v1_9.rs
+++ b/meilitool/src/upgrade/v1_9.rs
@@ -1,4 +1,5 @@
 use serde::{Deserialize, Serialize};
+use time::OffsetDateTime;
 
 pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
 
@@ -21,9 +22,9 @@ pub struct IndexStats {
     /// Association of every field name with the number of times it occurs in the documents.
     pub field_distribution: FieldDistribution,
     /// Creation date of the index.
-    pub created_at: time::OffsetDateTime,
+    pub created_at: LegacyTime,
     /// Date of the last update of the index.
-    pub updated_at: time::OffsetDateTime,
+    pub updated_at: LegacyTime,
 }
 
 #[derive(Debug, Deserialize, Serialize)]
@@ -97,4 +98,9 @@ mod rest {
     }
 }
 
-pub type OffsetDateTime = time::OffsetDateTime;
+// 2024-11-04 13:32:08.48368 +00:00:00
+time::serde::format_description!(legacy_datetime, OffsetDateTime, "[year]-[month]-[day] [hour]:[minute]:[second].[subsecond] [offset_hour sign:mandatory]:[offset_minute]:[offset_second]");
+
+#[derive(Debug, serde::Serialize, serde::Deserialize)]
+#[serde(transparent)]
+pub struct LegacyTime(#[serde(with = "legacy_datetime")] pub OffsetDateTime);

From 106cc7fe3a8dd295b9230fd77c3a98c3d8f86ace Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 4 Nov 2024 17:51:40 +0100
Subject: [PATCH 82/92] fmt

---
 .../src/routes/indexes/search_analytics.rs    | 20 +++++++++----------
 .../src/routes/indexes/settings.rs            |  2 +-
 .../src/routes/indexes/settings_analytics.rs  |  7 ++++---
 .../src/routes/indexes/similar_analytics.rs   |  8 +++-----
 crates/meilisearch/src/routes/multi_search.rs |  3 +--
 .../src/routes/multi_search_analytics.rs      |  6 ++----
 crates/meilisearch/tests/common/index.rs      |  3 +--
 7 files changed, 21 insertions(+), 28 deletions(-)

diff --git a/crates/meilisearch/src/routes/indexes/search_analytics.rs b/crates/meilisearch/src/routes/indexes/search_analytics.rs
index 8bbb1781f..b16e2636e 100644
--- a/crates/meilisearch/src/routes/indexes/search_analytics.rs
+++ b/crates/meilisearch/src/routes/indexes/search_analytics.rs
@@ -1,18 +1,16 @@
-use once_cell::sync::Lazy;
-use regex::Regex;
-use serde_json::{json, Value};
 use std::collections::{BTreeSet, BinaryHeap, HashMap};
 
 use meilisearch_types::locales::Locale;
+use once_cell::sync::Lazy;
+use regex::Regex;
+use serde_json::{json, Value};
 
-use crate::{
-    aggregate_methods,
-    analytics::{Aggregate, AggregateMethod},
-    search::{
-        SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
-        DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
-        DEFAULT_SEMANTIC_RATIO,
-    },
+use crate::aggregate_methods;
+use crate::analytics::{Aggregate, AggregateMethod};
+use crate::search::{
+    SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
+    DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
+    DEFAULT_SEMANTIC_RATIO,
 };
 
 aggregate_methods!(
diff --git a/crates/meilisearch/src/routes/indexes/settings.rs b/crates/meilisearch/src/routes/indexes/settings.rs
index bca763a99..a9d8d3053 100644
--- a/crates/meilisearch/src/routes/indexes/settings.rs
+++ b/crates/meilisearch/src/routes/indexes/settings.rs
@@ -1,4 +1,3 @@
-use super::settings_analytics::*;
 use actix_web::web::Data;
 use actix_web::{web, HttpRequest, HttpResponse};
 use deserr::actix_web::AwebJson;
@@ -11,6 +10,7 @@ use meilisearch_types::settings::{settings, SecretPolicy, Settings, Unchecked};
 use meilisearch_types::tasks::KindWithContent;
 use tracing::debug;
 
+use super::settings_analytics::*;
 use crate::analytics::Analytics;
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
diff --git a/crates/meilisearch/src/routes/indexes/settings_analytics.rs b/crates/meilisearch/src/routes/indexes/settings_analytics.rs
index de01b72e8..32bddcbdd 100644
--- a/crates/meilisearch/src/routes/indexes/settings_analytics.rs
+++ b/crates/meilisearch/src/routes/indexes/settings_analytics.rs
@@ -3,15 +3,16 @@
 //! through the sub-settings route directly without any manipulation.
 //! This is why we often use a `Option<&Vec<_>>` instead of a `Option<&[_]>`.
 
+use std::collections::{BTreeMap, BTreeSet, HashSet};
+
+use meilisearch_types::facet_values_sort::FacetValuesSort;
 use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView};
 use meilisearch_types::milli::update::Setting;
 use meilisearch_types::milli::vector::settings::EmbeddingSettings;
 use meilisearch_types::settings::{
-    FacetingSettings, PaginationSettings, ProximityPrecisionView, TypoSettings,
+    FacetingSettings, PaginationSettings, ProximityPrecisionView, RankingRuleView, TypoSettings,
 };
-use meilisearch_types::{facet_values_sort::FacetValuesSort, settings::RankingRuleView};
 use serde::Serialize;
-use std::collections::{BTreeMap, BTreeSet, HashSet};
 
 use crate::analytics::Aggregate;
 
diff --git a/crates/meilisearch/src/routes/indexes/similar_analytics.rs b/crates/meilisearch/src/routes/indexes/similar_analytics.rs
index 69685a56c..726839c3a 100644
--- a/crates/meilisearch/src/routes/indexes/similar_analytics.rs
+++ b/crates/meilisearch/src/routes/indexes/similar_analytics.rs
@@ -4,11 +4,9 @@ use once_cell::sync::Lazy;
 use regex::Regex;
 use serde_json::{json, Value};
 
-use crate::{
-    aggregate_methods,
-    analytics::{Aggregate, AggregateMethod},
-    search::{SimilarQuery, SimilarResult},
-};
+use crate::aggregate_methods;
+use crate::analytics::{Aggregate, AggregateMethod};
+use crate::search::{SimilarQuery, SimilarResult};
 
 aggregate_methods!(
     SimilarPOST => "Similar POST",
diff --git a/crates/meilisearch/src/routes/multi_search.rs b/crates/meilisearch/src/routes/multi_search.rs
index b7bd31716..f8b1bc6ee 100644
--- a/crates/meilisearch/src/routes/multi_search.rs
+++ b/crates/meilisearch/src/routes/multi_search.rs
@@ -9,6 +9,7 @@ use meilisearch_types::keys::actions;
 use serde::Serialize;
 use tracing::debug;
 
+use super::multi_search_analytics::MultiSearchAggregator;
 use crate::analytics::Analytics;
 use crate::error::MeilisearchHttpError;
 use crate::extractors::authentication::policies::ActionPolicy;
@@ -21,8 +22,6 @@ use crate::search::{
 };
 use crate::search_queue::SearchQueue;
 
-use super::multi_search_analytics::MultiSearchAggregator;
-
 pub fn configure(cfg: &mut web::ServiceConfig) {
     cfg.service(web::resource("").route(web::post().to(SeqHandler(multi_search_with_post))));
 }
diff --git a/crates/meilisearch/src/routes/multi_search_analytics.rs b/crates/meilisearch/src/routes/multi_search_analytics.rs
index be1218399..3d07f471c 100644
--- a/crates/meilisearch/src/routes/multi_search_analytics.rs
+++ b/crates/meilisearch/src/routes/multi_search_analytics.rs
@@ -2,10 +2,8 @@ use std::collections::HashSet;
 
 use serde_json::json;
 
-use crate::{
-    analytics::Aggregate,
-    search::{FederatedSearch, SearchQueryWithIndex},
-};
+use crate::analytics::Aggregate;
+use crate::search::{FederatedSearch, SearchQueryWithIndex};
 
 #[derive(Default)]
 pub struct MultiSearchAggregator {
diff --git a/crates/meilisearch/tests/common/index.rs b/crates/meilisearch/tests/common/index.rs
index 784067c2d..221333fd7 100644
--- a/crates/meilisearch/tests/common/index.rs
+++ b/crates/meilisearch/tests/common/index.rs
@@ -9,8 +9,7 @@ use urlencoding::encode as urlencode;
 
 use super::encoder::Encoder;
 use super::service::Service;
-use super::Value;
-use super::{Owned, Shared};
+use super::{Owned, Shared, Value};
 use crate::json;
 
 pub struct Index<'a, State = Owned> {

From 99a9fde37f18b0498cdbc7b88a1510f8912d00b9 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 4 Nov 2024 17:55:55 +0100
Subject: [PATCH 83/92] push back the removed files

---
 crates/meilitool/src/upgrade/mod.rs   |  73 +++++++
 crates/meilitool/src/upgrade/v1_10.rs | 289 ++++++++++++++++++++++++++
 crates/meilitool/src/upgrade/v1_11.rs |  86 ++++++++
 crates/meilitool/src/upgrade/v1_9.rs  | 106 ++++++++++
 4 files changed, 554 insertions(+)
 create mode 100644 crates/meilitool/src/upgrade/mod.rs
 create mode 100644 crates/meilitool/src/upgrade/v1_10.rs
 create mode 100644 crates/meilitool/src/upgrade/v1_11.rs
 create mode 100644 crates/meilitool/src/upgrade/v1_9.rs

diff --git a/crates/meilitool/src/upgrade/mod.rs b/crates/meilitool/src/upgrade/mod.rs
new file mode 100644
index 000000000..ae095b6bd
--- /dev/null
+++ b/crates/meilitool/src/upgrade/mod.rs
@@ -0,0 +1,73 @@
+mod v1_10;
+mod v1_11;
+mod v1_9;
+
+use std::path::{Path, PathBuf};
+
+use anyhow::{bail, Context};
+use meilisearch_types::versioning::create_version_file;
+
+use v1_10::v1_9_to_v1_10;
+
+use crate::upgrade::v1_11::v1_10_to_v1_11;
+
+pub struct OfflineUpgrade {
+    pub db_path: PathBuf,
+    pub current_version: (String, String, String),
+    pub target_version: (String, String, String),
+}
+
+impl OfflineUpgrade {
+    pub fn upgrade(self) -> anyhow::Result<()> {
+        let upgrade_list = [
+            (v1_9_to_v1_10 as fn(&Path) -> Result<(), anyhow::Error>, "1", "10", "0"),
+            (v1_10_to_v1_11, "1", "11", "0"),
+        ];
+
+        let (current_major, current_minor, current_patch) = &self.current_version;
+
+        let start_at = match (
+            current_major.as_str(),
+            current_minor.as_str(),
+            current_patch.as_str(),
+        ) {
+            ("1", "9", _) => 0,
+            ("1", "10", _) => 1,
+            _ => {
+                bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9")
+            }
+        };
+
+        let (target_major, target_minor, target_patch) = &self.target_version;
+
+        let ends_at = match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) {
+            ("1", "10", _) => 0,
+            ("1", "11", _) => 1,
+            (major, _, _) if major.starts_with('v') => {
+                bail!("Target version must not starts with a `v`. Instead of writing `v1.9.0` write `1.9.0` for example.")
+            }
+            _ => {
+                bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.11")
+            }
+        };
+
+        println!("Starting the upgrade from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}");
+
+        #[allow(clippy::needless_range_loop)]
+        for index in start_at..=ends_at {
+            let (func, major, minor, patch) = upgrade_list[index];
+            (func)(&self.db_path)?;
+            println!("Done");
+            // We're writing the version file just in case an issue arise _while_ upgrading.
+            // We don't want the DB to fail in an unknown state.
+            println!("Writing VERSION file");
+
+            create_version_file(&self.db_path, major, minor, patch)
+                .context("while writing VERSION file after the upgrade")?;
+        }
+
+        println!("Success");
+
+        Ok(())
+    }
+}
diff --git a/crates/meilitool/src/upgrade/v1_10.rs b/crates/meilitool/src/upgrade/v1_10.rs
new file mode 100644
index 000000000..671f4d6d2
--- /dev/null
+++ b/crates/meilitool/src/upgrade/v1_10.rs
@@ -0,0 +1,289 @@
+use anyhow::bail;
+use std::path::Path;
+
+use anyhow::Context;
+use meilisearch_types::{
+    heed::{
+        types::{SerdeJson, Str},
+        Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified,
+    },
+    milli::index::{db_name, main_key},
+};
+
+use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec};
+
+use super::v1_9;
+
+pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
+
+/// The statistics that can be computed from an `Index` object.
+#[derive(serde::Serialize, serde::Deserialize, Debug)]
+pub struct IndexStats {
+    /// Number of documents in the index.
+    pub number_of_documents: u64,
+    /// Size taken up by the index' DB, in bytes.
+    ///
+    /// This includes the size taken by both the used and free pages of the DB, and as the free pages
+    /// are not returned to the disk after a deletion, this number is typically larger than
+    /// `used_database_size` that only includes the size of the used pages.
+    pub database_size: u64,
+    /// Size taken by the used pages of the index' DB, in bytes.
+    ///
+    /// As the DB backend does not return to the disk the pages that are not currently used by the DB,
+    /// this value is typically smaller than `database_size`.
+    pub used_database_size: u64,
+    /// Association of every field name with the number of times it occurs in the documents.
+    pub field_distribution: FieldDistribution,
+    /// Creation date of the index.
+    #[serde(with = "time::serde::rfc3339")]
+    pub created_at: time::OffsetDateTime,
+    /// Date of the last update of the index.
+    #[serde(with = "time::serde::rfc3339")]
+    pub updated_at: time::OffsetDateTime,
+}
+
+impl From<v1_9::IndexStats> for IndexStats {
+    fn from(
+        v1_9::IndexStats {
+            number_of_documents,
+            database_size,
+            used_database_size,
+            field_distribution,
+            created_at,
+            updated_at,
+        }: v1_9::IndexStats,
+    ) -> Self {
+        IndexStats {
+            number_of_documents,
+            database_size,
+            used_database_size,
+            field_distribution,
+            created_at: created_at.0,
+            updated_at: updated_at.0,
+        }
+    }
+}
+
+#[derive(serde::Serialize, serde::Deserialize)]
+#[serde(transparent)]
+pub struct OffsetDateTime(#[serde(with = "time::serde::rfc3339")] pub time::OffsetDateTime);
+
+fn update_index_stats(
+    index_stats: Database<UuidCodec, Unspecified>,
+    index_uid: &str,
+    index_uuid: uuid::Uuid,
+    sched_wtxn: &mut RwTxn,
+) -> anyhow::Result<()> {
+    let ctx = || format!("while updating index stats for index `{index_uid}`");
+
+    let stats: Option<&str> = index_stats
+        .remap_data_type::<Str>()
+        .get(sched_wtxn, &index_uuid)
+        .with_context(ctx)
+        .with_context(|| "While reading value")?;
+    dbg!(stats);
+
+    let stats: Option<v1_9::IndexStats> = index_stats
+        .remap_data_type::<SerdeJson<v1_9::IndexStats>>()
+        .get(sched_wtxn, &index_uuid)
+        .with_context(ctx)
+        .with_context(|| "While reading value")?;
+
+    if let Some(stats) = stats {
+        let stats: self::IndexStats = stats.into();
+
+        index_stats
+            .remap_data_type::<SerdeJson<self::IndexStats>>()
+            .put(sched_wtxn, &index_uuid, &stats)
+            .with_context(ctx)
+            .with_context(|| "While writing value")?;
+    }
+
+    Ok(())
+}
+
+fn update_date_format(
+    index_uid: &str,
+    index_env: &Env,
+    index_wtxn: &mut RwTxn,
+) -> anyhow::Result<()> {
+    let main = try_opening_poly_database(index_env, index_wtxn, db_name::MAIN)
+        .with_context(|| format!("while updating date format for index `{index_uid}`"))?;
+
+    date_round_trip(index_wtxn, index_uid, main, main_key::CREATED_AT_KEY)?;
+    date_round_trip(index_wtxn, index_uid, main, main_key::UPDATED_AT_KEY)?;
+
+    Ok(())
+}
+
+fn find_rest_embedders(
+    index_uid: &str,
+    index_env: &Env,
+    index_txn: &RoTxn,
+) -> anyhow::Result<Vec<String>> {
+    let main = try_opening_poly_database(index_env, index_txn, db_name::MAIN)
+        .with_context(|| format!("while checking REST embedders for index `{index_uid}`"))?;
+
+    let mut rest_embedders = vec![];
+
+    for config in main
+        .remap_types::<Str, SerdeJson<Vec<v1_9::IndexEmbeddingConfig>>>()
+        .get(index_txn, main_key::EMBEDDING_CONFIGS)?
+        .unwrap_or_default()
+    {
+        if let v1_9::EmbedderOptions::Rest(_) = config.config.embedder_options {
+            rest_embedders.push(config.name);
+        }
+    }
+
+    Ok(rest_embedders)
+}
+
+fn date_round_trip(
+    wtxn: &mut RwTxn,
+    index_uid: &str,
+    db: Database<Unspecified, Unspecified>,
+    key: &str,
+) -> anyhow::Result<()> {
+    let datetime =
+        db.remap_types::<Str, SerdeJson<v1_9::LegacyTime>>().get(wtxn, key).with_context(|| {
+            format!("could not read `{key}` while updating date format for index `{index_uid}`")
+        })?;
+
+    if let Some(datetime) = datetime {
+        db.remap_types::<Str, SerdeJson<self::OffsetDateTime>>()
+            .put(wtxn, key, &self::OffsetDateTime(datetime.0))
+            .with_context(|| {
+                format!(
+                    "could not write `{key}` while updating date format for index `{index_uid}`"
+                )
+            })?;
+    }
+
+    Ok(())
+}
+
+pub fn v1_9_to_v1_10(db_path: &Path) -> anyhow::Result<()> {
+    println!("Upgrading from v1.9.0 to v1.10.0");
+    // 2 changes here
+
+    // 1. date format. needs to be done before opening the Index
+    // 2. REST embedders. We don't support this case right now, so bail
+
+    let index_scheduler_path = db_path.join("tasks");
+    let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
+        .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
+
+    let mut sched_wtxn = env.write_txn()?;
+
+    let index_mapping: Database<Str, UuidCodec> =
+        try_opening_database(&env, &sched_wtxn, "index-mapping")?;
+
+    let index_stats: Database<UuidCodec, Unspecified> =
+        try_opening_database(&env, &sched_wtxn, "index-stats").with_context(|| {
+            format!("While trying to open {:?}", index_scheduler_path.display())
+        })?;
+
+    let index_count =
+        index_mapping.len(&sched_wtxn).context("while reading the number of indexes")?;
+
+    // FIXME: not ideal, we have to pre-populate all indexes to prevent double borrow of sched_wtxn
+    // 1. immutably for the iteration
+    // 2. mutably for updating index stats
+    let indexes: Vec<_> = index_mapping
+        .iter(&sched_wtxn)?
+        .map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid)))
+        .collect();
+
+    let mut rest_embedders = Vec::new();
+
+    let mut unwrapped_indexes = Vec::new();
+
+    // check that update can take place
+    for (index_index, result) in indexes.into_iter().enumerate() {
+        let (uid, uuid) = result?;
+        let index_path = db_path.join("indexes").join(uuid.to_string());
+
+        println!(
+            "[{}/{index_count}]Checking that update can take place for  `{uid}` at `{}`",
+            index_index + 1,
+            index_path.display()
+        );
+
+        let index_env = unsafe {
+            // FIXME: fetch the 25 magic number from the index file
+            EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
+                format!("while opening index {uid} at '{}'", index_path.display())
+            })?
+        };
+
+        let index_txn = index_env.read_txn().with_context(|| {
+            format!(
+                "while obtaining a write transaction for index {uid} at {}",
+                index_path.display()
+            )
+        })?;
+
+        println!("\t- Checking for incompatible embedders (REST embedders)");
+        let rest_embedders_for_index = find_rest_embedders(&uid, &index_env, &index_txn)?;
+
+        if rest_embedders_for_index.is_empty() {
+            unwrapped_indexes.push((uid, uuid));
+        } else {
+            // no need to add to unwrapped indexes because we'll exit early
+            rest_embedders.push((uid, rest_embedders_for_index));
+        }
+    }
+
+    if !rest_embedders.is_empty() {
+        let rest_embedders = rest_embedders
+            .into_iter()
+            .flat_map(|(index, embedders)| std::iter::repeat(index.clone()).zip(embedders))
+            .map(|(index, embedder)| format!("\t- embedder `{embedder}` in index `{index}`"))
+            .collect::<Vec<_>>()
+            .join("\n");
+        bail!("The update cannot take place because there are REST embedder(s). Remove them before proceeding with the update:\n{rest_embedders}\n\n\
+            The database has not been modified and is still a valid v1.9 database.");
+    }
+
+    println!("Update can take place, updating");
+
+    for (index_index, (uid, uuid)) in unwrapped_indexes.into_iter().enumerate() {
+        let index_path = db_path.join("indexes").join(uuid.to_string());
+
+        println!(
+            "[{}/{index_count}]Updating index `{uid}` at `{}`",
+            index_index + 1,
+            index_path.display()
+        );
+
+        let index_env = unsafe {
+            // FIXME: fetch the 25 magic number from the index file
+            EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
+                format!("while opening index {uid} at '{}'", index_path.display())
+            })?
+        };
+
+        let mut index_wtxn = index_env.write_txn().with_context(|| {
+            format!(
+                "while obtaining a write transaction for index `{uid}` at `{}`",
+                index_path.display()
+            )
+        })?;
+
+        println!("\t- Updating index stats");
+        update_index_stats(index_stats, &uid, uuid, &mut sched_wtxn)?;
+        println!("\t- Updating date format");
+        update_date_format(&uid, &index_env, &mut index_wtxn)?;
+
+        index_wtxn.commit().with_context(|| {
+            format!("while committing the write txn for index `{uid}` at {}", index_path.display())
+        })?;
+    }
+
+    sched_wtxn.commit().context("while committing the write txn for the index-scheduler")?;
+
+    println!("Upgrading database succeeded");
+
+    Ok(())
+}
diff --git a/crates/meilitool/src/upgrade/v1_11.rs b/crates/meilitool/src/upgrade/v1_11.rs
new file mode 100644
index 000000000..4105879fd
--- /dev/null
+++ b/crates/meilitool/src/upgrade/v1_11.rs
@@ -0,0 +1,86 @@
+//! The breaking changes that happened between the v1.10 and the v1.11 are:
+//! - Arroy went from the v0.4.0 to the v0.5.0, see this release note to get the whole context: https://github.com/meilisearch/arroy/releases/tag/v0.5.0
+//!   - The `angular` distance has been renamed to `cosine` => We only need to update the string in the metadata.
+//!   - Reorganize the `NodeId` to make the appending of vectors work => We'll have to update the keys of almost all items in the DB.
+//!   - Store the list of updated IDs directly in LMDB instead of a roaring bitmap => This shouldn't be an issue since we are never supposed to commit this roaring bitmap, but it's not forbidden by arroy so ensuring it works is probably better than anything.
+
+use std::path::Path;
+
+use anyhow::Context;
+use meilisearch_types::{
+    heed::{types::Str, Database, EnvOpenOptions},
+    milli::index::db_name,
+};
+
+use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec};
+
+pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> {
+    println!("Upgrading from v1.10.0 to v1.11.0");
+
+    let index_scheduler_path = db_path.join("tasks");
+    let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
+        .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
+
+    let sched_rtxn = env.read_txn()?;
+
+    let index_mapping: Database<Str, UuidCodec> =
+        try_opening_database(&env, &sched_rtxn, "index-mapping")?;
+
+    let index_count =
+        index_mapping.len(&sched_rtxn).context("while reading the number of indexes")?;
+
+    let indexes: Vec<_> = index_mapping
+        .iter(&sched_rtxn)?
+        .map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid)))
+        .collect();
+
+    // check that update can take place
+    for (index_index, result) in indexes.into_iter().enumerate() {
+        let (uid, uuid) = result?;
+        let index_path = db_path.join("indexes").join(uuid.to_string());
+
+        println!(
+            "[{}/{index_count}]Checking that update can take place for  `{uid}` at `{}`",
+            index_index + 1,
+            index_path.display()
+        );
+
+        let index_env = unsafe {
+            EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
+                format!("while opening index {uid} at '{}'", index_path.display())
+            })?
+        };
+
+        let index_rtxn = index_env.read_txn().with_context(|| {
+            format!(
+                "while obtaining a read transaction for index {uid} at {}",
+                index_path.display()
+            )
+        })?;
+        let index_read_database =
+            try_opening_poly_database(&index_env, &index_rtxn, db_name::VECTOR_ARROY)
+                .with_context(|| format!("while updating date format for index `{uid}`"))?;
+
+        let mut index_wtxn = index_env.write_txn().with_context(|| {
+            format!(
+                "while obtaining a write transaction for index {uid} at {}",
+                index_path.display()
+            )
+        })?;
+
+        let index_write_database =
+            try_opening_poly_database(&index_env, &index_wtxn, db_name::VECTOR_ARROY)
+                .with_context(|| format!("while updating date format for index `{uid}`"))?;
+
+        arroy_v04_to_v05::ugrade_from_prev_version(
+            &index_rtxn,
+            index_read_database,
+            &mut index_wtxn,
+            index_write_database,
+        )?;
+
+        index_wtxn.commit()?;
+    }
+
+    Ok(())
+}
diff --git a/crates/meilitool/src/upgrade/v1_9.rs b/crates/meilitool/src/upgrade/v1_9.rs
new file mode 100644
index 000000000..3e6cfde6c
--- /dev/null
+++ b/crates/meilitool/src/upgrade/v1_9.rs
@@ -0,0 +1,106 @@
+use serde::{Deserialize, Serialize};
+use time::OffsetDateTime;
+
+pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
+
+/// The statistics that can be computed from an `Index` object.
+#[derive(serde::Serialize, serde::Deserialize, Debug)]
+pub struct IndexStats {
+    /// Number of documents in the index.
+    pub number_of_documents: u64,
+    /// Size taken up by the index' DB, in bytes.
+    ///
+    /// This includes the size taken by both the used and free pages of the DB, and as the free pages
+    /// are not returned to the disk after a deletion, this number is typically larger than
+    /// `used_database_size` that only includes the size of the used pages.
+    pub database_size: u64,
+    /// Size taken by the used pages of the index' DB, in bytes.
+    ///
+    /// As the DB backend does not return to the disk the pages that are not currently used by the DB,
+    /// this value is typically smaller than `database_size`.
+    pub used_database_size: u64,
+    /// Association of every field name with the number of times it occurs in the documents.
+    pub field_distribution: FieldDistribution,
+    /// Creation date of the index.
+    pub created_at: LegacyTime,
+    /// Date of the last update of the index.
+    pub updated_at: LegacyTime,
+}
+
+#[derive(Debug, Deserialize, Serialize)]
+pub struct IndexEmbeddingConfig {
+    pub name: String,
+    pub config: EmbeddingConfig,
+}
+
+#[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)]
+pub struct EmbeddingConfig {
+    /// Options of the embedder, specific to each kind of embedder
+    pub embedder_options: EmbedderOptions,
+}
+
+/// Options of an embedder, specific to each kind of embedder.
+#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
+pub enum EmbedderOptions {
+    HuggingFace(hf::EmbedderOptions),
+    OpenAi(openai::EmbedderOptions),
+    Ollama(ollama::EmbedderOptions),
+    UserProvided(manual::EmbedderOptions),
+    Rest(rest::EmbedderOptions),
+}
+
+impl Default for EmbedderOptions {
+    fn default() -> Self {
+        Self::OpenAi(openai::EmbedderOptions { api_key: None, dimensions: None })
+    }
+}
+
+mod hf {
+    #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
+    pub struct EmbedderOptions {
+        pub model: String,
+        pub revision: Option<String>,
+    }
+}
+mod openai {
+
+    #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
+    pub struct EmbedderOptions {
+        pub api_key: Option<String>,
+        pub dimensions: Option<usize>,
+    }
+}
+mod ollama {
+    #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
+    pub struct EmbedderOptions {
+        pub embedding_model: String,
+        pub url: Option<String>,
+        pub api_key: Option<String>,
+    }
+}
+mod manual {
+    #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
+    pub struct EmbedderOptions {
+        pub dimensions: usize,
+    }
+}
+mod rest {
+    #[derive(Debug, Clone, PartialEq, Eq, serde::Deserialize, serde::Serialize, Hash)]
+    pub struct EmbedderOptions {
+        pub api_key: Option<String>,
+        pub dimensions: Option<usize>,
+        pub url: String,
+        pub input_field: Vec<String>,
+        // path to the array of embeddings
+        pub path_to_embeddings: Vec<String>,
+        // shape of a single embedding
+        pub embedding_object: Vec<String>,
+    }
+}
+
+// 2024-11-04 13:32:08.48368 +00:00:00
+time::serde::format_description!(legacy_datetime, OffsetDateTime, "[year]-[month]-[day] [hour]:[minute]:[second].[subsecond] [offset_hour sign:mandatory]:[offset_minute]:[offset_second]");
+
+#[derive(Debug, serde::Serialize, serde::Deserialize)]
+#[serde(transparent)]
+pub struct LegacyTime(#[serde(with = "legacy_datetime")] pub OffsetDateTime);

From a1f228f662f5fd76b15fab8acabcbf3b7f40080e Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 4 Nov 2024 18:19:36 +0100
Subject: [PATCH 84/92] remove the uneeded files after the rebase

---
 meilitool/src/upgrade/mod.rs   |  73 ---------
 meilitool/src/upgrade/v1_10.rs | 289 ---------------------------------
 meilitool/src/upgrade/v1_11.rs |  86 ----------
 meilitool/src/upgrade/v1_9.rs  | 106 ------------
 4 files changed, 554 deletions(-)
 delete mode 100644 meilitool/src/upgrade/mod.rs
 delete mode 100644 meilitool/src/upgrade/v1_10.rs
 delete mode 100644 meilitool/src/upgrade/v1_11.rs
 delete mode 100644 meilitool/src/upgrade/v1_9.rs

diff --git a/meilitool/src/upgrade/mod.rs b/meilitool/src/upgrade/mod.rs
deleted file mode 100644
index ae095b6bd..000000000
--- a/meilitool/src/upgrade/mod.rs
+++ /dev/null
@@ -1,73 +0,0 @@
-mod v1_10;
-mod v1_11;
-mod v1_9;
-
-use std::path::{Path, PathBuf};
-
-use anyhow::{bail, Context};
-use meilisearch_types::versioning::create_version_file;
-
-use v1_10::v1_9_to_v1_10;
-
-use crate::upgrade::v1_11::v1_10_to_v1_11;
-
-pub struct OfflineUpgrade {
-    pub db_path: PathBuf,
-    pub current_version: (String, String, String),
-    pub target_version: (String, String, String),
-}
-
-impl OfflineUpgrade {
-    pub fn upgrade(self) -> anyhow::Result<()> {
-        let upgrade_list = [
-            (v1_9_to_v1_10 as fn(&Path) -> Result<(), anyhow::Error>, "1", "10", "0"),
-            (v1_10_to_v1_11, "1", "11", "0"),
-        ];
-
-        let (current_major, current_minor, current_patch) = &self.current_version;
-
-        let start_at = match (
-            current_major.as_str(),
-            current_minor.as_str(),
-            current_patch.as_str(),
-        ) {
-            ("1", "9", _) => 0,
-            ("1", "10", _) => 1,
-            _ => {
-                bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9")
-            }
-        };
-
-        let (target_major, target_minor, target_patch) = &self.target_version;
-
-        let ends_at = match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) {
-            ("1", "10", _) => 0,
-            ("1", "11", _) => 1,
-            (major, _, _) if major.starts_with('v') => {
-                bail!("Target version must not starts with a `v`. Instead of writing `v1.9.0` write `1.9.0` for example.")
-            }
-            _ => {
-                bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.11")
-            }
-        };
-
-        println!("Starting the upgrade from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}");
-
-        #[allow(clippy::needless_range_loop)]
-        for index in start_at..=ends_at {
-            let (func, major, minor, patch) = upgrade_list[index];
-            (func)(&self.db_path)?;
-            println!("Done");
-            // We're writing the version file just in case an issue arise _while_ upgrading.
-            // We don't want the DB to fail in an unknown state.
-            println!("Writing VERSION file");
-
-            create_version_file(&self.db_path, major, minor, patch)
-                .context("while writing VERSION file after the upgrade")?;
-        }
-
-        println!("Success");
-
-        Ok(())
-    }
-}
diff --git a/meilitool/src/upgrade/v1_10.rs b/meilitool/src/upgrade/v1_10.rs
deleted file mode 100644
index 671f4d6d2..000000000
--- a/meilitool/src/upgrade/v1_10.rs
+++ /dev/null
@@ -1,289 +0,0 @@
-use anyhow::bail;
-use std::path::Path;
-
-use anyhow::Context;
-use meilisearch_types::{
-    heed::{
-        types::{SerdeJson, Str},
-        Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified,
-    },
-    milli::index::{db_name, main_key},
-};
-
-use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec};
-
-use super::v1_9;
-
-pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
-
-/// The statistics that can be computed from an `Index` object.
-#[derive(serde::Serialize, serde::Deserialize, Debug)]
-pub struct IndexStats {
-    /// Number of documents in the index.
-    pub number_of_documents: u64,
-    /// Size taken up by the index' DB, in bytes.
-    ///
-    /// This includes the size taken by both the used and free pages of the DB, and as the free pages
-    /// are not returned to the disk after a deletion, this number is typically larger than
-    /// `used_database_size` that only includes the size of the used pages.
-    pub database_size: u64,
-    /// Size taken by the used pages of the index' DB, in bytes.
-    ///
-    /// As the DB backend does not return to the disk the pages that are not currently used by the DB,
-    /// this value is typically smaller than `database_size`.
-    pub used_database_size: u64,
-    /// Association of every field name with the number of times it occurs in the documents.
-    pub field_distribution: FieldDistribution,
-    /// Creation date of the index.
-    #[serde(with = "time::serde::rfc3339")]
-    pub created_at: time::OffsetDateTime,
-    /// Date of the last update of the index.
-    #[serde(with = "time::serde::rfc3339")]
-    pub updated_at: time::OffsetDateTime,
-}
-
-impl From<v1_9::IndexStats> for IndexStats {
-    fn from(
-        v1_9::IndexStats {
-            number_of_documents,
-            database_size,
-            used_database_size,
-            field_distribution,
-            created_at,
-            updated_at,
-        }: v1_9::IndexStats,
-    ) -> Self {
-        IndexStats {
-            number_of_documents,
-            database_size,
-            used_database_size,
-            field_distribution,
-            created_at: created_at.0,
-            updated_at: updated_at.0,
-        }
-    }
-}
-
-#[derive(serde::Serialize, serde::Deserialize)]
-#[serde(transparent)]
-pub struct OffsetDateTime(#[serde(with = "time::serde::rfc3339")] pub time::OffsetDateTime);
-
-fn update_index_stats(
-    index_stats: Database<UuidCodec, Unspecified>,
-    index_uid: &str,
-    index_uuid: uuid::Uuid,
-    sched_wtxn: &mut RwTxn,
-) -> anyhow::Result<()> {
-    let ctx = || format!("while updating index stats for index `{index_uid}`");
-
-    let stats: Option<&str> = index_stats
-        .remap_data_type::<Str>()
-        .get(sched_wtxn, &index_uuid)
-        .with_context(ctx)
-        .with_context(|| "While reading value")?;
-    dbg!(stats);
-
-    let stats: Option<v1_9::IndexStats> = index_stats
-        .remap_data_type::<SerdeJson<v1_9::IndexStats>>()
-        .get(sched_wtxn, &index_uuid)
-        .with_context(ctx)
-        .with_context(|| "While reading value")?;
-
-    if let Some(stats) = stats {
-        let stats: self::IndexStats = stats.into();
-
-        index_stats
-            .remap_data_type::<SerdeJson<self::IndexStats>>()
-            .put(sched_wtxn, &index_uuid, &stats)
-            .with_context(ctx)
-            .with_context(|| "While writing value")?;
-    }
-
-    Ok(())
-}
-
-fn update_date_format(
-    index_uid: &str,
-    index_env: &Env,
-    index_wtxn: &mut RwTxn,
-) -> anyhow::Result<()> {
-    let main = try_opening_poly_database(index_env, index_wtxn, db_name::MAIN)
-        .with_context(|| format!("while updating date format for index `{index_uid}`"))?;
-
-    date_round_trip(index_wtxn, index_uid, main, main_key::CREATED_AT_KEY)?;
-    date_round_trip(index_wtxn, index_uid, main, main_key::UPDATED_AT_KEY)?;
-
-    Ok(())
-}
-
-fn find_rest_embedders(
-    index_uid: &str,
-    index_env: &Env,
-    index_txn: &RoTxn,
-) -> anyhow::Result<Vec<String>> {
-    let main = try_opening_poly_database(index_env, index_txn, db_name::MAIN)
-        .with_context(|| format!("while checking REST embedders for index `{index_uid}`"))?;
-
-    let mut rest_embedders = vec![];
-
-    for config in main
-        .remap_types::<Str, SerdeJson<Vec<v1_9::IndexEmbeddingConfig>>>()
-        .get(index_txn, main_key::EMBEDDING_CONFIGS)?
-        .unwrap_or_default()
-    {
-        if let v1_9::EmbedderOptions::Rest(_) = config.config.embedder_options {
-            rest_embedders.push(config.name);
-        }
-    }
-
-    Ok(rest_embedders)
-}
-
-fn date_round_trip(
-    wtxn: &mut RwTxn,
-    index_uid: &str,
-    db: Database<Unspecified, Unspecified>,
-    key: &str,
-) -> anyhow::Result<()> {
-    let datetime =
-        db.remap_types::<Str, SerdeJson<v1_9::LegacyTime>>().get(wtxn, key).with_context(|| {
-            format!("could not read `{key}` while updating date format for index `{index_uid}`")
-        })?;
-
-    if let Some(datetime) = datetime {
-        db.remap_types::<Str, SerdeJson<self::OffsetDateTime>>()
-            .put(wtxn, key, &self::OffsetDateTime(datetime.0))
-            .with_context(|| {
-                format!(
-                    "could not write `{key}` while updating date format for index `{index_uid}`"
-                )
-            })?;
-    }
-
-    Ok(())
-}
-
-pub fn v1_9_to_v1_10(db_path: &Path) -> anyhow::Result<()> {
-    println!("Upgrading from v1.9.0 to v1.10.0");
-    // 2 changes here
-
-    // 1. date format. needs to be done before opening the Index
-    // 2. REST embedders. We don't support this case right now, so bail
-
-    let index_scheduler_path = db_path.join("tasks");
-    let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
-        .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
-
-    let mut sched_wtxn = env.write_txn()?;
-
-    let index_mapping: Database<Str, UuidCodec> =
-        try_opening_database(&env, &sched_wtxn, "index-mapping")?;
-
-    let index_stats: Database<UuidCodec, Unspecified> =
-        try_opening_database(&env, &sched_wtxn, "index-stats").with_context(|| {
-            format!("While trying to open {:?}", index_scheduler_path.display())
-        })?;
-
-    let index_count =
-        index_mapping.len(&sched_wtxn).context("while reading the number of indexes")?;
-
-    // FIXME: not ideal, we have to pre-populate all indexes to prevent double borrow of sched_wtxn
-    // 1. immutably for the iteration
-    // 2. mutably for updating index stats
-    let indexes: Vec<_> = index_mapping
-        .iter(&sched_wtxn)?
-        .map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid)))
-        .collect();
-
-    let mut rest_embedders = Vec::new();
-
-    let mut unwrapped_indexes = Vec::new();
-
-    // check that update can take place
-    for (index_index, result) in indexes.into_iter().enumerate() {
-        let (uid, uuid) = result?;
-        let index_path = db_path.join("indexes").join(uuid.to_string());
-
-        println!(
-            "[{}/{index_count}]Checking that update can take place for  `{uid}` at `{}`",
-            index_index + 1,
-            index_path.display()
-        );
-
-        let index_env = unsafe {
-            // FIXME: fetch the 25 magic number from the index file
-            EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
-                format!("while opening index {uid} at '{}'", index_path.display())
-            })?
-        };
-
-        let index_txn = index_env.read_txn().with_context(|| {
-            format!(
-                "while obtaining a write transaction for index {uid} at {}",
-                index_path.display()
-            )
-        })?;
-
-        println!("\t- Checking for incompatible embedders (REST embedders)");
-        let rest_embedders_for_index = find_rest_embedders(&uid, &index_env, &index_txn)?;
-
-        if rest_embedders_for_index.is_empty() {
-            unwrapped_indexes.push((uid, uuid));
-        } else {
-            // no need to add to unwrapped indexes because we'll exit early
-            rest_embedders.push((uid, rest_embedders_for_index));
-        }
-    }
-
-    if !rest_embedders.is_empty() {
-        let rest_embedders = rest_embedders
-            .into_iter()
-            .flat_map(|(index, embedders)| std::iter::repeat(index.clone()).zip(embedders))
-            .map(|(index, embedder)| format!("\t- embedder `{embedder}` in index `{index}`"))
-            .collect::<Vec<_>>()
-            .join("\n");
-        bail!("The update cannot take place because there are REST embedder(s). Remove them before proceeding with the update:\n{rest_embedders}\n\n\
-            The database has not been modified and is still a valid v1.9 database.");
-    }
-
-    println!("Update can take place, updating");
-
-    for (index_index, (uid, uuid)) in unwrapped_indexes.into_iter().enumerate() {
-        let index_path = db_path.join("indexes").join(uuid.to_string());
-
-        println!(
-            "[{}/{index_count}]Updating index `{uid}` at `{}`",
-            index_index + 1,
-            index_path.display()
-        );
-
-        let index_env = unsafe {
-            // FIXME: fetch the 25 magic number from the index file
-            EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
-                format!("while opening index {uid} at '{}'", index_path.display())
-            })?
-        };
-
-        let mut index_wtxn = index_env.write_txn().with_context(|| {
-            format!(
-                "while obtaining a write transaction for index `{uid}` at `{}`",
-                index_path.display()
-            )
-        })?;
-
-        println!("\t- Updating index stats");
-        update_index_stats(index_stats, &uid, uuid, &mut sched_wtxn)?;
-        println!("\t- Updating date format");
-        update_date_format(&uid, &index_env, &mut index_wtxn)?;
-
-        index_wtxn.commit().with_context(|| {
-            format!("while committing the write txn for index `{uid}` at {}", index_path.display())
-        })?;
-    }
-
-    sched_wtxn.commit().context("while committing the write txn for the index-scheduler")?;
-
-    println!("Upgrading database succeeded");
-
-    Ok(())
-}
diff --git a/meilitool/src/upgrade/v1_11.rs b/meilitool/src/upgrade/v1_11.rs
deleted file mode 100644
index 4105879fd..000000000
--- a/meilitool/src/upgrade/v1_11.rs
+++ /dev/null
@@ -1,86 +0,0 @@
-//! The breaking changes that happened between the v1.10 and the v1.11 are:
-//! - Arroy went from the v0.4.0 to the v0.5.0, see this release note to get the whole context: https://github.com/meilisearch/arroy/releases/tag/v0.5.0
-//!   - The `angular` distance has been renamed to `cosine` => We only need to update the string in the metadata.
-//!   - Reorganize the `NodeId` to make the appending of vectors work => We'll have to update the keys of almost all items in the DB.
-//!   - Store the list of updated IDs directly in LMDB instead of a roaring bitmap => This shouldn't be an issue since we are never supposed to commit this roaring bitmap, but it's not forbidden by arroy so ensuring it works is probably better than anything.
-
-use std::path::Path;
-
-use anyhow::Context;
-use meilisearch_types::{
-    heed::{types::Str, Database, EnvOpenOptions},
-    milli::index::db_name,
-};
-
-use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec};
-
-pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> {
-    println!("Upgrading from v1.10.0 to v1.11.0");
-
-    let index_scheduler_path = db_path.join("tasks");
-    let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
-        .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
-
-    let sched_rtxn = env.read_txn()?;
-
-    let index_mapping: Database<Str, UuidCodec> =
-        try_opening_database(&env, &sched_rtxn, "index-mapping")?;
-
-    let index_count =
-        index_mapping.len(&sched_rtxn).context("while reading the number of indexes")?;
-
-    let indexes: Vec<_> = index_mapping
-        .iter(&sched_rtxn)?
-        .map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid)))
-        .collect();
-
-    // check that update can take place
-    for (index_index, result) in indexes.into_iter().enumerate() {
-        let (uid, uuid) = result?;
-        let index_path = db_path.join("indexes").join(uuid.to_string());
-
-        println!(
-            "[{}/{index_count}]Checking that update can take place for  `{uid}` at `{}`",
-            index_index + 1,
-            index_path.display()
-        );
-
-        let index_env = unsafe {
-            EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
-                format!("while opening index {uid} at '{}'", index_path.display())
-            })?
-        };
-
-        let index_rtxn = index_env.read_txn().with_context(|| {
-            format!(
-                "while obtaining a read transaction for index {uid} at {}",
-                index_path.display()
-            )
-        })?;
-        let index_read_database =
-            try_opening_poly_database(&index_env, &index_rtxn, db_name::VECTOR_ARROY)
-                .with_context(|| format!("while updating date format for index `{uid}`"))?;
-
-        let mut index_wtxn = index_env.write_txn().with_context(|| {
-            format!(
-                "while obtaining a write transaction for index {uid} at {}",
-                index_path.display()
-            )
-        })?;
-
-        let index_write_database =
-            try_opening_poly_database(&index_env, &index_wtxn, db_name::VECTOR_ARROY)
-                .with_context(|| format!("while updating date format for index `{uid}`"))?;
-
-        arroy_v04_to_v05::ugrade_from_prev_version(
-            &index_rtxn,
-            index_read_database,
-            &mut index_wtxn,
-            index_write_database,
-        )?;
-
-        index_wtxn.commit()?;
-    }
-
-    Ok(())
-}
diff --git a/meilitool/src/upgrade/v1_9.rs b/meilitool/src/upgrade/v1_9.rs
deleted file mode 100644
index 3e6cfde6c..000000000
--- a/meilitool/src/upgrade/v1_9.rs
+++ /dev/null
@@ -1,106 +0,0 @@
-use serde::{Deserialize, Serialize};
-use time::OffsetDateTime;
-
-pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
-
-/// The statistics that can be computed from an `Index` object.
-#[derive(serde::Serialize, serde::Deserialize, Debug)]
-pub struct IndexStats {
-    /// Number of documents in the index.
-    pub number_of_documents: u64,
-    /// Size taken up by the index' DB, in bytes.
-    ///
-    /// This includes the size taken by both the used and free pages of the DB, and as the free pages
-    /// are not returned to the disk after a deletion, this number is typically larger than
-    /// `used_database_size` that only includes the size of the used pages.
-    pub database_size: u64,
-    /// Size taken by the used pages of the index' DB, in bytes.
-    ///
-    /// As the DB backend does not return to the disk the pages that are not currently used by the DB,
-    /// this value is typically smaller than `database_size`.
-    pub used_database_size: u64,
-    /// Association of every field name with the number of times it occurs in the documents.
-    pub field_distribution: FieldDistribution,
-    /// Creation date of the index.
-    pub created_at: LegacyTime,
-    /// Date of the last update of the index.
-    pub updated_at: LegacyTime,
-}
-
-#[derive(Debug, Deserialize, Serialize)]
-pub struct IndexEmbeddingConfig {
-    pub name: String,
-    pub config: EmbeddingConfig,
-}
-
-#[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)]
-pub struct EmbeddingConfig {
-    /// Options of the embedder, specific to each kind of embedder
-    pub embedder_options: EmbedderOptions,
-}
-
-/// Options of an embedder, specific to each kind of embedder.
-#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
-pub enum EmbedderOptions {
-    HuggingFace(hf::EmbedderOptions),
-    OpenAi(openai::EmbedderOptions),
-    Ollama(ollama::EmbedderOptions),
-    UserProvided(manual::EmbedderOptions),
-    Rest(rest::EmbedderOptions),
-}
-
-impl Default for EmbedderOptions {
-    fn default() -> Self {
-        Self::OpenAi(openai::EmbedderOptions { api_key: None, dimensions: None })
-    }
-}
-
-mod hf {
-    #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
-    pub struct EmbedderOptions {
-        pub model: String,
-        pub revision: Option<String>,
-    }
-}
-mod openai {
-
-    #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
-    pub struct EmbedderOptions {
-        pub api_key: Option<String>,
-        pub dimensions: Option<usize>,
-    }
-}
-mod ollama {
-    #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
-    pub struct EmbedderOptions {
-        pub embedding_model: String,
-        pub url: Option<String>,
-        pub api_key: Option<String>,
-    }
-}
-mod manual {
-    #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
-    pub struct EmbedderOptions {
-        pub dimensions: usize,
-    }
-}
-mod rest {
-    #[derive(Debug, Clone, PartialEq, Eq, serde::Deserialize, serde::Serialize, Hash)]
-    pub struct EmbedderOptions {
-        pub api_key: Option<String>,
-        pub dimensions: Option<usize>,
-        pub url: String,
-        pub input_field: Vec<String>,
-        // path to the array of embeddings
-        pub path_to_embeddings: Vec<String>,
-        // shape of a single embedding
-        pub embedding_object: Vec<String>,
-    }
-}
-
-// 2024-11-04 13:32:08.48368 +00:00:00
-time::serde::format_description!(legacy_datetime, OffsetDateTime, "[year]-[month]-[day] [hour]:[minute]:[second].[subsecond] [offset_hour sign:mandatory]:[offset_minute]:[offset_second]");
-
-#[derive(Debug, serde::Serialize, serde::Deserialize)]
-#[serde(transparent)]
-pub struct LegacyTime(#[serde(with = "legacy_datetime")] pub OffsetDateTime);

From 48ab898ca2d8cd125458aac1ea500ecf324b7bc8 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 5 Nov 2024 10:30:53 +0100
Subject: [PATCH 85/92] fix the datetime of v1.9

---
 crates/meilitool/Cargo.toml           |  2 +-
 crates/meilitool/src/upgrade/v1_10.rs |  6 +--
 crates/meilitool/src/upgrade/v1_9.rs  | 70 +++++++++++++++++++++++----
 3 files changed, 65 insertions(+), 13 deletions(-)

diff --git a/crates/meilitool/Cargo.toml b/crates/meilitool/Cargo.toml
index f2c8920c9..353d44e9a 100644
--- a/crates/meilitool/Cargo.toml
+++ b/crates/meilitool/Cargo.toml
@@ -16,6 +16,6 @@ file-store = { path = "../file-store" }
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
 serde = { version = "1.0.209", features = ["derive"] }
-time = { version = "0.3.36", features = ["formatting"] }
+time = { version = "0.3.36", features = ["formatting", "parsing", "alloc"] }
 uuid = { version = "1.10.0", features = ["v4"], default-features = false }
 arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", rev = "053807bf38dc079f25b003f19fc30fbf3613f6e7" }
diff --git a/crates/meilitool/src/upgrade/v1_10.rs b/crates/meilitool/src/upgrade/v1_10.rs
index 671f4d6d2..3dd7c72a2 100644
--- a/crates/meilitool/src/upgrade/v1_10.rs
+++ b/crates/meilitool/src/upgrade/v1_10.rs
@@ -146,9 +146,9 @@ fn date_round_trip(
     key: &str,
 ) -> anyhow::Result<()> {
     let datetime =
-        db.remap_types::<Str, SerdeJson<v1_9::LegacyTime>>().get(wtxn, key).with_context(|| {
-            format!("could not read `{key}` while updating date format for index `{index_uid}`")
-        })?;
+        db.remap_types::<Str, SerdeJson<v1_9::LegacyDateTime>>().get(wtxn, key).with_context(
+            || format!("could not read `{key}` while updating date format for index `{index_uid}`"),
+        )?;
 
     if let Some(datetime) = datetime {
         db.remap_types::<Str, SerdeJson<self::OffsetDateTime>>()
diff --git a/crates/meilitool/src/upgrade/v1_9.rs b/crates/meilitool/src/upgrade/v1_9.rs
index 3e6cfde6c..96cbfe68c 100644
--- a/crates/meilitool/src/upgrade/v1_9.rs
+++ b/crates/meilitool/src/upgrade/v1_9.rs
@@ -1,10 +1,10 @@
 use serde::{Deserialize, Serialize};
-use time::OffsetDateTime;
+use time::{Date, OffsetDateTime, Time, UtcOffset};
 
 pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
 
 /// The statistics that can be computed from an `Index` object.
-#[derive(serde::Serialize, serde::Deserialize, Debug)]
+#[derive(serde::Deserialize, Debug)]
 pub struct IndexStats {
     /// Number of documents in the index.
     pub number_of_documents: u64,
@@ -22,9 +22,9 @@ pub struct IndexStats {
     /// Association of every field name with the number of times it occurs in the documents.
     pub field_distribution: FieldDistribution,
     /// Creation date of the index.
-    pub created_at: LegacyTime,
+    pub created_at: LegacyDateTime,
     /// Date of the last update of the index.
-    pub updated_at: LegacyTime,
+    pub updated_at: LegacyDateTime,
 }
 
 #[derive(Debug, Deserialize, Serialize)]
@@ -98,9 +98,61 @@ mod rest {
     }
 }
 
-// 2024-11-04 13:32:08.48368 +00:00:00
-time::serde::format_description!(legacy_datetime, OffsetDateTime, "[year]-[month]-[day] [hour]:[minute]:[second].[subsecond] [offset_hour sign:mandatory]:[offset_minute]:[offset_second]");
+/// A datetime from Meilisearch v1.9 with an unspecified format.
+#[derive(Debug)]
+pub struct LegacyDateTime(pub OffsetDateTime);
 
-#[derive(Debug, serde::Serialize, serde::Deserialize)]
-#[serde(transparent)]
-pub struct LegacyTime(#[serde(with = "legacy_datetime")] pub OffsetDateTime);
+impl<'de> Deserialize<'de> for LegacyDateTime {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        struct Visitor;
+        impl<'de> serde::de::Visitor<'de> for Visitor {
+            type Value = OffsetDateTime;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
+                write!(formatter, "a valid datetime")
+            }
+
+            // Comes from a binary. The legacy format is:
+            // 2024-11-04 13:32:08.48368 +00:00:00
+            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
+            where
+                E: serde::de::Error,
+            {
+                let format = time::macros::format_description!("[year]-[month]-[day] [hour]:[minute]:[second].[subsecond] [offset_hour sign:mandatory]:[offset_minute]:[offset_second]");
+                OffsetDateTime::parse(v, format).map_err(E::custom)
+            }
+
+            // Comes from the docker image, the legacy format is:
+            // [2024,        309,     17,     15,   1, 698184971, 0,0,0]
+            // year,  day in year,  hour, minute, sec, subsec   , offset stuff
+            fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
+            where
+                A: serde::de::SeqAccess<'de>,
+            {
+                let mut vec = Vec::new();
+                // We must deserialize the value as `i64` because the largest values are `u32` and `i32`
+                while let Some(el) = seq.next_element::<i64>()? {
+                    vec.push(el);
+                }
+                if vec.len() != 9 {
+                    return Err(serde::de::Error::custom(format!(
+                        "Invalid datetime, received an array of {} elements instead of 9",
+                        vec.len()
+                    )));
+                }
+                Ok(OffsetDateTime::new_in_offset(
+                    Date::from_ordinal_date(vec[0] as i32, vec[1] as u16)
+                        .map_err(serde::de::Error::custom)?,
+                    Time::from_hms_nano(vec[2] as u8, vec[3] as u8, vec[4] as u8, vec[5] as u32)
+                        .map_err(serde::de::Error::custom)?,
+                    UtcOffset::from_hms(vec[6] as i8, vec[7] as i8, vec[8] as i8)
+                        .map_err(serde::de::Error::custom)?,
+                ))
+            }
+        }
+        deserializer.deserialize_any(Visitor).map(LegacyDateTime)
+    }
+}

From 9799812b27b0fee47b969a1e3bdba771f29b93bc Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 5 Nov 2024 15:08:01 +0100
Subject: [PATCH 86/92] fix the benchmarks

---
 .github/workflows/benchmarks-manual.yml            | 2 +-
 .github/workflows/benchmarks-pr.yml                | 2 +-
 .github/workflows/benchmarks-push-indexing.yml     | 2 +-
 .github/workflows/benchmarks-push-search-songs.yml | 2 +-
 .github/workflows/benchmarks-push-search-wiki.yml  | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/benchmarks-manual.yml b/.github/workflows/benchmarks-manual.yml
index da33bf803..14b77c83d 100644
--- a/.github/workflows/benchmarks-manual.yml
+++ b/.github/workflows/benchmarks-manual.yml
@@ -43,7 +43,7 @@ jobs:
       # Run benchmarks
       - name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
         run: |
-          cd benchmarks
+          cd crates/benchmarks
           cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
 
       # Generate critcmp files
diff --git a/.github/workflows/benchmarks-pr.yml b/.github/workflows/benchmarks-pr.yml
index f9d609d6e..a083baa3c 100644
--- a/.github/workflows/benchmarks-pr.yml
+++ b/.github/workflows/benchmarks-pr.yml
@@ -88,7 +88,7 @@ jobs:
       # Run benchmarks
       - name: Run benchmarks - Dataset ${{ steps.command.outputs.command-arguments }} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
         run: |
-          cd benchmarks
+          cd crates/benchmarks
           cargo bench --bench ${{ steps.command.outputs.command-arguments }} -- --save-baseline ${{ steps.file.outputs.basename }}
 
       # Generate critcmp files
diff --git a/.github/workflows/benchmarks-push-indexing.yml b/.github/workflows/benchmarks-push-indexing.yml
index 1fdd5fd67..4495b4b9d 100644
--- a/.github/workflows/benchmarks-push-indexing.yml
+++ b/.github/workflows/benchmarks-push-indexing.yml
@@ -41,7 +41,7 @@ jobs:
       # Run benchmarks
       - name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
         run: |
-          cd benchmarks
+          cd crates/benchmarks
           cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
 
       # Generate critcmp files
diff --git a/.github/workflows/benchmarks-push-search-songs.yml b/.github/workflows/benchmarks-push-search-songs.yml
index b6169ddf7..e9744a434 100644
--- a/.github/workflows/benchmarks-push-search-songs.yml
+++ b/.github/workflows/benchmarks-push-search-songs.yml
@@ -40,7 +40,7 @@ jobs:
       # Run benchmarks
       - name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
         run: |
-          cd benchmarks
+          cd crates/benchmarks
           cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
 
       # Generate critcmp files
diff --git a/.github/workflows/benchmarks-push-search-wiki.yml b/.github/workflows/benchmarks-push-search-wiki.yml
index dd3146a14..bc9e1bcd0 100644
--- a/.github/workflows/benchmarks-push-search-wiki.yml
+++ b/.github/workflows/benchmarks-push-search-wiki.yml
@@ -40,7 +40,7 @@ jobs:
       # Run benchmarks
       - name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
         run: |
-          cd benchmarks
+          cd crates/benchmarks
           cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
 
       # Generate critcmp files

From f193c3a67c5d0a39d94e8437ef683aaa27b0e377 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 5 Nov 2024 15:13:32 +0100
Subject: [PATCH 87/92] Update crates/meilitool/src/main.rs

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
---
 crates/meilitool/src/main.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/meilitool/src/main.rs b/crates/meilitool/src/main.rs
index ef137f746..978824356 100644
--- a/crates/meilitool/src/main.rs
+++ b/crates/meilitool/src/main.rs
@@ -73,7 +73,7 @@ enum Command {
     ///
     /// Supported upgrade paths:
     ///
-    /// - v1.9.0 -> v1.10.0 -> v1.11.0
+    /// - v1.9.x -> v1.10.x -> v1.11.x
     OfflineUpgrade {
         #[arg(long)]
         target_version: String,

From 66b7e0824efd310b335be45b12f461695f99e1b4 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 5 Nov 2024 15:13:40 +0100
Subject: [PATCH 88/92] Update crates/meilitool/src/upgrade/mod.rs

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
---
 crates/meilitool/src/upgrade/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/meilitool/src/upgrade/mod.rs b/crates/meilitool/src/upgrade/mod.rs
index ae095b6bd..0fd903ffe 100644
--- a/crates/meilitool/src/upgrade/mod.rs
+++ b/crates/meilitool/src/upgrade/mod.rs
@@ -34,7 +34,7 @@ impl OfflineUpgrade {
             ("1", "9", _) => 0,
             ("1", "10", _) => 1,
             _ => {
-                bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9")
+                bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9 and v1.10")
             }
         };
 

From e4993aa705a8e8a3a870a4616c845bfd143fd5f9 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 5 Nov 2024 15:13:50 +0100
Subject: [PATCH 89/92] Update crates/meilitool/src/upgrade/mod.rs

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
---
 crates/meilitool/src/upgrade/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/meilitool/src/upgrade/mod.rs b/crates/meilitool/src/upgrade/mod.rs
index 0fd903ffe..36630c3b3 100644
--- a/crates/meilitool/src/upgrade/mod.rs
+++ b/crates/meilitool/src/upgrade/mod.rs
@@ -47,7 +47,7 @@ impl OfflineUpgrade {
                 bail!("Target version must not starts with a `v`. Instead of writing `v1.9.0` write `1.9.0` for example.")
             }
             _ => {
-                bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.11")
+                bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.10 and v1.11")
             }
         };
 

From 0f74a933467b0e372898975fa18a69cc3d1dd5b9 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 5 Nov 2024 15:14:02 +0100
Subject: [PATCH 90/92] Update crates/meilitool/src/upgrade/v1_11.rs

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
---
 crates/meilitool/src/upgrade/v1_11.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/crates/meilitool/src/upgrade/v1_11.rs b/crates/meilitool/src/upgrade/v1_11.rs
index 4105879fd..de852f3dc 100644
--- a/crates/meilitool/src/upgrade/v1_11.rs
+++ b/crates/meilitool/src/upgrade/v1_11.rs
@@ -34,7 +34,6 @@ pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> {
         .map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid)))
         .collect();
 
-    // check that update can take place
     for (index_index, result) in indexes.into_iter().enumerate() {
         let (uid, uuid) = result?;
         let index_path = db_path.join("indexes").join(uuid.to_string());

From a5d138ac34448c7fc2410dee1e16ebca91b1a248 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 5 Nov 2024 15:23:27 +0100
Subject: [PATCH 91/92] use a tag while importing arroy instead of a loose
 branch or rev

---
 Cargo.lock                  | 4 ++--
 crates/meilitool/Cargo.toml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 04812fd1b..cef8e9c8a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -407,7 +407,7 @@ dependencies = [
 [[package]]
 name = "arroy"
 version = "0.5.0"
-source = "git+https://github.com/meilisearch/arroy/?rev=053807bf38dc079f25b003f19fc30fbf3613f6e7#053807bf38dc079f25b003f19fc30fbf3613f6e7"
+source = "git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05#053807bf38dc079f25b003f19fc30fbf3613f6e7"
 dependencies = [
  "bytemuck",
  "byteorder",
@@ -3536,7 +3536,7 @@ name = "meilitool"
 version = "1.11.0"
 dependencies = [
  "anyhow",
- "arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?rev=053807bf38dc079f25b003f19fc30fbf3613f6e7)",
+ "arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05)",
  "clap",
  "dump",
  "file-store",
diff --git a/crates/meilitool/Cargo.toml b/crates/meilitool/Cargo.toml
index 353d44e9a..048da6232 100644
--- a/crates/meilitool/Cargo.toml
+++ b/crates/meilitool/Cargo.toml
@@ -18,4 +18,4 @@ meilisearch-types = { path = "../meilisearch-types" }
 serde = { version = "1.0.209", features = ["derive"] }
 time = { version = "0.3.36", features = ["formatting", "parsing", "alloc"] }
 uuid = { version = "1.10.0", features = ["v4"], default-features = false }
-arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", rev = "053807bf38dc079f25b003f19fc30fbf3613f6e7" }
+arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" }

From 7415ef7ff5498bdc93ef835713f865df80c4b144 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 5 Nov 2024 15:37:59 +0100
Subject: [PATCH 92/92] Update crates/meilitool/src/upgrade/v1_11.rs

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
---
 crates/meilitool/src/upgrade/v1_11.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/meilitool/src/upgrade/v1_11.rs b/crates/meilitool/src/upgrade/v1_11.rs
index de852f3dc..0c84d3842 100644
--- a/crates/meilitool/src/upgrade/v1_11.rs
+++ b/crates/meilitool/src/upgrade/v1_11.rs
@@ -39,7 +39,7 @@ pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> {
         let index_path = db_path.join("indexes").join(uuid.to_string());
 
         println!(
-            "[{}/{index_count}]Checking that update can take place for  `{uid}` at `{}`",
+            "[{}/{index_count}]Updating embeddings for `{uid}` at `{}`",
             index_index + 1,
             index_path.display()
         );