Merge #4769

4769: Federated search r=ManyTheFish a=dureuill # Pull Request ## Related issue Fixes #4747 [Usage](https://meilisearch.notion.site/v1-10-federated-search-698dfe36ab6b4668b044f735fb40f0b2) ## What does this PR do? - multi-search now allows a top-level federation object. When not `null`, the results of multi-search are modified to be a single list of results rather than a list of a list of results - changed lifetimes around tokenizer et al. to be able to make hits one by one rather than using a vector - adds `roaring` to Meilisearch itself. As the federated search happens at the Meilisearch level (reuses the search functions declared at the Meilisearch level + merge happens after the hits were created), `RoaringBitmap`s are needed to track the candidates: hits that were seen, all candidates. - Refactor `make_hits` to allow for an individual, optimized `make_hit` - Score details comparison no longer fail when sorting on different field names or target point (for geo) Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-11-26 12:05:05 +08:00 · 2024-07-16 08:14:46 +00:00 · 2024-07-16 08:14:46 +00:00 · 1582c7e788
commit 1582c7e788
parent 77b9347fff 20094eba06
16 changed files with 5985 additions and 216 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3394,6 +3394,7 @@ dependencies = [
 "rayon",
 "regex",
 "reqwest",
 "roaring",
 "rustls 0.21.12",
 "rustls-pemfile 1.0.4",
 "segment",
--- a/meilisearch-types/src/deserr/mod.rs
+++ b/meilisearch-types/src/deserr/mod.rs
@ -192,6 +192,7 @@ merge_with_error_impl_take_error_message!(ParseOffsetDateTimeError);
 merge_with_error_impl_take_error_message!(ParseTaskKindError);
 merge_with_error_impl_take_error_message!(ParseTaskStatusError);
 merge_with_error_impl_take_error_message!(IndexUidFormatError);
 merge_with_error_impl_take_error_message!(InvalidMultiSearchWeight);
 merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
 merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold);
 merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold);
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@ -238,6 +238,11 @@ InvalidIndexLimit                     , InvalidRequest       , BAD_REQUEST ;
 InvalidIndexOffset                    , InvalidRequest       , BAD_REQUEST ;
 InvalidIndexPrimaryKey                , InvalidRequest       , BAD_REQUEST ;
 InvalidIndexUid                       , InvalidRequest       , BAD_REQUEST ;
 InvalidMultiSearchFederated           , InvalidRequest       , BAD_REQUEST ;
 InvalidMultiSearchFederationOptions   , InvalidRequest       , BAD_REQUEST ;
 InvalidMultiSearchQueryPagination     , InvalidRequest       , BAD_REQUEST ;
 InvalidMultiSearchQueryRankingRules   , InvalidRequest       , BAD_REQUEST ;
 InvalidMultiSearchWeight              , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchAttributesToSearchOn     , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchAttributesToCrop         , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchAttributesToHighlight    , InvalidRequest       , BAD_REQUEST ;
@ -512,6 +517,12 @@ impl fmt::Display for deserr_codes::InvalidSearchSemanticRatio {
    }
 }
 impl fmt::Display for deserr_codes::InvalidMultiSearchWeight {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "the value of `weight` is invalid, expected a positive float (>= 0.0).")
    }
 }
 impl fmt::Display for deserr_codes::InvalidSimilarId {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@ -102,6 +102,7 @@ tracing-subscriber = { version = "0.3.18", features = ["json"] }
 tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
 tracing-actix-web = "0.7.11"
 build-info = { version = "1.7.0", path = "../build-info" }
 roaring = "0.10.2"
 [dev-dependencies]
 actix-rt = "2.10.0"
--- a/meilisearch/src/analytics/mock_analytics.rs
+++ b/meilisearch/src/analytics/mock_analytics.rs
@ -42,7 +42,7 @@ pub struct MultiSearchAggregator;
 #[allow(dead_code)]
 impl MultiSearchAggregator {
-    pub fn from_queries(_: &dyn Any, _: &dyn Any) -> Self {
+    pub fn from_federated_search(_: &dyn Any, _: &dyn Any) -> Self {
        Self
    }
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@ -34,8 +34,8 @@ use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumen
 use crate::routes::indexes::facet_search::FacetSearchQuery;
 use crate::routes::{create_all_stats, Stats};
 use crate::search::{
-    FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
+    FacetSearchResult, FederatedSearch, MatchingStrategy, SearchQuery, SearchQueryWithIndex,
-    SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
+    SearchResult, SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
    DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
    DEFAULT_SEMANTIC_RATIO,
 };
@ -1095,22 +1095,33 @@ pub struct MultiSearchAggregator {
    show_ranking_score: bool,
    show_ranking_score_details: bool,
    // federation
    use_federation: bool,
    // context
    user_agents: HashSet<String>,
 }
 impl MultiSearchAggregator {
-    pub fn from_queries(query: &[SearchQueryWithIndex], request: &HttpRequest) -> Self {
+    pub fn from_federated_search(
        federated_search: &FederatedSearch,
        request: &HttpRequest,
    ) -> Self {
        let timestamp = Some(OffsetDateTime::now_utc());
        let user_agents = extract_user_agents(request).into_iter().collect();
-        let distinct_indexes: HashSet<_> = query
+        let use_federation = federated_search.federation.is_some();
        let distinct_indexes: HashSet<_> = federated_search
            .queries
            .iter()
            .map(|query| {
                let query = &query;
                // make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex
                let SearchQueryWithIndex {
                    index_uid,
                    federation_options: _,
                    q: _,
                    vector: _,
                    offset: _,
@ -1142,8 +1153,10 @@ impl MultiSearchAggregator {
            })
            .collect();
-        let show_ranking_score = query.iter().any(|query| query.show_ranking_score);
+        let show_ranking_score =
-        let show_ranking_score_details = query.iter().any(|query| query.show_ranking_score_details);
+            federated_search.queries.iter().any(|query| query.show_ranking_score);
        let show_ranking_score_details =
            federated_search.queries.iter().any(|query| query.show_ranking_score_details);
        Self {
            timestamp,
@ -1151,10 +1164,11 @@ impl MultiSearchAggregator {
            total_succeeded: 0,
            total_distinct_index_count: distinct_indexes.len(),
            total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
-            total_search_count: query.len(),
+            total_search_count: federated_search.queries.len(),
            show_ranking_score,
            show_ranking_score_details,
            user_agents,
            use_federation,
        }
    }
@ -1180,6 +1194,7 @@ impl MultiSearchAggregator {
        let show_ranking_score_details =
            this.show_ranking_score_details || other.show_ranking_score_details;
        let mut user_agents = this.user_agents;
        let use_federation = this.use_federation || other.use_federation;
        for user_agent in other.user_agents.into_iter() {
            user_agents.insert(user_agent);
@ -1196,6 +1211,7 @@ impl MultiSearchAggregator {
            user_agents,
            show_ranking_score,
            show_ranking_score_details,
            use_federation,
            // do not add _ or ..Default::default() here
        };
@ -1214,6 +1230,7 @@ impl MultiSearchAggregator {
            user_agents,
            show_ranking_score,
            show_ranking_score_details,
            use_federation,
        } = self;
        if total_received == 0 {
@ -1238,6 +1255,9 @@ impl MultiSearchAggregator {
                "scoring": {
                    "show_ranking_score": show_ranking_score,
                    "show_ranking_score_details": show_ranking_score_details,
                },
                "federation": {
                    "use_federation": use_federation,
                }
            });
--- a/meilisearch/src/error.rs
+++ b/meilisearch/src/error.rs
@ -25,6 +25,10 @@ pub enum MeilisearchHttpError {
    DocumentNotFound(String),
    #[error("Sending an empty filter is forbidden.")]
    EmptyFilter,
    #[error("Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #{0} or add `federation: {{}}` to the request.")]
    FederationOptionsInNonFederatedRequest(usize),
    #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n Hint: remove `{1}` from query #{0} or remove `federation: {{}}` from the request")]
    PaginationInFederatedQuery(usize, &'static str),
    #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
    InvalidExpression(&'static [&'static str], Value),
    #[error("A {0} payload is missing.")]
@ -86,6 +90,12 @@ impl ErrorCode for MeilisearchHttpError {
            MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
            MeilisearchHttpError::Join(_) => Code::Internal,
            MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
            MeilisearchHttpError::FederationOptionsInNonFederatedRequest(_) => {
                Code::InvalidMultiSearchFederationOptions
            }
            MeilisearchHttpError::PaginationInFederatedQuery(_, _) => {
                Code::InvalidMultiSearchQueryPagination
            }
        }
    }
 }
--- a/meilisearch/src/routes/multi_search.rs
+++ b/meilisearch/src/routes/multi_search.rs
@ -10,12 +10,14 @@ use serde::Serialize;
 use tracing::debug;
 use crate::analytics::{Analytics, MultiSearchAggregator};
 use crate::error::MeilisearchHttpError;
 use crate::extractors::authentication::policies::ActionPolicy;
 use crate::extractors::authentication::{AuthenticationError, GuardedData};
 use crate::extractors::sequential_extractor::SeqHandler;
 use crate::routes::indexes::search::search_kind;
 use crate::search::{
-    add_search_rules, perform_search, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
+    add_search_rules, perform_federated_search, perform_search, FederatedSearch, RetrieveVectors,
    SearchQueryWithIndex, SearchResultWithIndex,
 };
 use crate::search_queue::SearchQueue;
@ -28,46 +30,86 @@ struct SearchResults {
    results: Vec<SearchResultWithIndex>,
 }
 #[derive(Debug, deserr::Deserr)]
 #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
 pub struct SearchQueries {
    queries: Vec<SearchQueryWithIndex>,
 }
 pub async fn multi_search_with_post(
    index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
    search_queue: Data<SearchQueue>,
-    params: AwebJson<SearchQueries, DeserrJsonError>,
+    params: AwebJson<FederatedSearch, DeserrJsonError>,
    req: HttpRequest,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let queries = params.into_inner().queries;
    let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
    let features = index_scheduler.features();
    // Since we don't want to process half of the search requests and then get a permit refused
    // we're going to get one permit for the whole duration of the multi-search request.
    let _permit = search_queue.try_get_search_permit().await?;
    let federated_search = params.into_inner();
    let mut multi_aggregate = MultiSearchAggregator::from_federated_search(&federated_search, &req);
    let FederatedSearch { mut queries, federation } = federated_search;
    let features = index_scheduler.features();
    // regardless of federation, check authorization and apply search rules
    let auth = 'check_authorization: {
        for (query_index, federated_query) in queries.iter_mut().enumerate() {
            let index_uid = federated_query.index_uid.as_str();
            // Check index from API key
            if !index_scheduler.filters().is_index_authorized(index_uid) {
                break 'check_authorization Err(AuthenticationError::InvalidToken)
                    .with_index(query_index);
            }
            // Apply search rules from tenant token
            if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(index_uid)
            {
                add_search_rules(&mut federated_query.filter, search_rules);
            }
        }
        Ok(())
    };
    auth.map_err(|(mut err, query_index)| {
        // Add the query index that failed as context for the error message.
        // We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type
        // of result and we can benefit from static typing.
        err.message = format!("Inside `.queries[{query_index}]`: {}", err.message);
        err
    })?;
    let response = match federation {
        Some(federation) => {
            let search_result = tokio::task::spawn_blocking(move || {
                perform_federated_search(&index_scheduler, queries, federation, features)
            })
            .await;
            if let Ok(Ok(_)) = search_result {
                multi_aggregate.succeed();
            }
            analytics.post_multi_search(multi_aggregate);
            HttpResponse::Ok().json(search_result??)
        }
        None => {
            // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
            // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
            // changes.
            let search_results: Result<_, (ResponseError, usize)> = async {
                let mut search_results = Vec::with_capacity(queries.len());
-        for (query_index, (index_uid, mut query)) in
+                for (query_index, (index_uid, query, federation_options)) in queries
-            queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
+                    .into_iter()
                    .map(SearchQueryWithIndex::into_index_query_federation)
                    .enumerate()
                {
                    debug!(on_index = query_index, parameters = ?query, "Multi-search");
-            // Check index from API key
+                    if federation_options.is_some() {
-            if !index_scheduler.filters().is_index_authorized(&index_uid) {
+                        return Err((
-                return Err(AuthenticationError::InvalidToken).with_index(query_index);
+                            MeilisearchHttpError::FederationOptionsInNonFederatedRequest(
-            }
+                                query_index,
-            // Apply search rules from tenant token
+                            )
-            if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid)
+                            .into(),
-            {
+                            query_index,
-                add_search_rules(&mut query.filter, search_rules);
+                        ));
                    }
                    let index = index_scheduler
@ -81,10 +123,11 @@ pub async fn multi_search_with_post(
                        })
                        .with_index(query_index)?;
-            let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)
+                    let search_kind =
                        search_kind(&query, index_scheduler.get_ref(), &index, features)
                            .with_index(query_index)?;
                    let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)
                        .with_index(query_index)?;
            let retrieve_vector =
                RetrieveVectors::new(query.retrieve_vectors, features).with_index(query_index)?;
                    let search_result = tokio::task::spawn_blocking(move || {
                        perform_search(&index, query, search_kind, retrieve_vector)
@ -116,7 +159,11 @@ pub async fn multi_search_with_post(
            debug!(returns = ?search_results, "Multi-search");
-    Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
+            HttpResponse::Ok().json(SearchResults { results: search_results })
        }
    };
    Ok(response)
 }
 /// Local `Result` extension trait to avoid `map_err` boilerplate.
--- a/meilisearch/src/search/federated.rs
+++ b/meilisearch/src/search/federated.rs
@ -0,0 +1,629 @@
 use std::cmp::Ordering;
 use std::collections::BTreeMap;
 use std::fmt;
 use std::iter::Zip;
 use std::rc::Rc;
 use std::str::FromStr as _;
 use std::time::Duration;
 use std::vec::{IntoIter, Vec};
 use actix_http::StatusCode;
 use index_scheduler::{IndexScheduler, RoFeatures};
 use meilisearch_types::deserr::DeserrJsonError;
 use meilisearch_types::error::deserr_codes::{
    InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset,
 };
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue};
 use meilisearch_types::milli::{self, DocumentId, TimeBudget};
 use roaring::RoaringBitmap;
 use serde::Serialize;
 use super::ranking_rules::{self, RankingRules};
 use super::{
    prepare_search, AttributesFormat, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind,
    SearchQuery, SearchQueryWithIndex,
 };
 use crate::error::MeilisearchHttpError;
 use crate::routes::indexes::search::search_kind;
 pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0;
 #[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr)]
 #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
 pub struct FederationOptions {
    #[deserr(default, error = DeserrJsonError<InvalidMultiSearchWeight>)]
    pub weight: Weight,
 }
 #[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
 #[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)]
 pub struct Weight(f64);
 impl Default for Weight {
    fn default() -> Self {
        Weight(DEFAULT_FEDERATED_WEIGHT)
    }
 }
 impl std::convert::TryFrom<f64> for Weight {
    type Error = InvalidMultiSearchWeight;
    fn try_from(f: f64) -> Result<Self, Self::Error> {
        if f < 0.0 {
            Err(InvalidMultiSearchWeight)
        } else {
            Ok(Weight(f))
        }
    }
 }
 impl std::ops::Deref for Weight {
    type Target = f64;
    fn deref(&self) -> &Self::Target {
        &self.0
    }
 }
 #[derive(Debug, deserr::Deserr)]
 #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
 pub struct Federation {
    #[deserr(default = super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
    pub limit: usize,
    #[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
    pub offset: usize,
 }
 #[derive(Debug, deserr::Deserr)]
 #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
 pub struct FederatedSearch {
    pub queries: Vec<SearchQueryWithIndex>,
    #[deserr(default)]
    pub federation: Option<Federation>,
 }
 #[derive(Serialize, Clone, PartialEq)]
 #[serde(rename_all = "camelCase")]
 pub struct FederatedSearchResult {
    pub hits: Vec<SearchHit>,
    pub processing_time_ms: u128,
    #[serde(flatten)]
    pub hits_info: HitsInfo,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub semantic_hit_count: Option<u32>,
    // These fields are only used for analytics purposes
    #[serde(skip)]
    pub degraded: bool,
    #[serde(skip)]
    pub used_negative_operator: bool,
 }
 impl fmt::Debug for FederatedSearchResult {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let FederatedSearchResult {
            hits,
            processing_time_ms,
            hits_info,
            semantic_hit_count,
            degraded,
            used_negative_operator,
        } = self;
        let mut debug = f.debug_struct("SearchResult");
        // The most important thing when looking at a search result is the time it took to process
        debug.field("processing_time_ms", &processing_time_ms);
        debug.field("hits", &format!("[{} hits returned]", hits.len()));
        debug.field("hits_info", &hits_info);
        if *used_negative_operator {
            debug.field("used_negative_operator", used_negative_operator);
        }
        if *degraded {
            debug.field("degraded", degraded);
        }
        if let Some(semantic_hit_count) = semantic_hit_count {
            debug.field("semantic_hit_count", &semantic_hit_count);
        }
        debug.finish()
    }
 }
 struct WeightedScore<'a> {
    details: &'a [ScoreDetails],
    weight: f64,
 }
 impl<'a> WeightedScore<'a> {
    pub fn new(details: &'a [ScoreDetails], weight: f64) -> Self {
        Self { details, weight }
    }
    pub fn weighted_global_score(&self) -> f64 {
        ScoreDetails::global_score(self.details.iter()) * self.weight
    }
    pub fn compare_weighted_global_scores(&self, other: &Self) -> Ordering {
        self.weighted_global_score()
            .partial_cmp(&other.weighted_global_score())
            // both are numbers, possibly infinite
            .unwrap()
    }
    pub fn compare(&self, other: &Self) -> Ordering {
        let mut left_it = ScoreDetails::score_values(self.details.iter());
        let mut right_it = ScoreDetails::score_values(other.details.iter());
        loop {
            let left = left_it.next();
            let right = right_it.next();
            match (left, right) {
                (None, None) => return Ordering::Equal,
                (None, Some(_)) => return Ordering::Less,
                (Some(_), None) => return Ordering::Greater,
                (Some(ScoreValue::Score(left)), Some(ScoreValue::Score(right))) => {
                    let left = left * self.weight;
                    let right = right * other.weight;
                    if (left - right).abs() <= f64::EPSILON {
                        continue;
                    }
                    return left.partial_cmp(&right).unwrap();
                }
                (Some(ScoreValue::Sort(left)), Some(ScoreValue::Sort(right))) => {
                    match left.partial_cmp(right) {
                        Some(Ordering::Equal) => continue,
                        Some(order) => return order,
                        None => return self.compare_weighted_global_scores(other),
                    }
                }
                (Some(ScoreValue::GeoSort(left)), Some(ScoreValue::GeoSort(right))) => {
                    match left.partial_cmp(right) {
                        Some(Ordering::Equal) => continue,
                        Some(order) => return order,
                        None => {
                            return self.compare_weighted_global_scores(other);
                        }
                    }
                }
                // not comparable details, use global
                (Some(ScoreValue::Score(_)), Some(_))
                | (Some(_), Some(ScoreValue::Score(_)))
                | (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_)))
                | (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => {
                    let left_count = left_it.count();
                    let right_count = right_it.count();
                    // compare how many remaining groups of rules each side has.
                    // the group with the most remaining groups wins.
                    return left_count
                        .cmp(&right_count)
                        // breaks ties with the global ranking score
                        .then_with(|| self.compare_weighted_global_scores(other));
                }
            }
        }
    }
 }
 struct QueryByIndex {
    query: SearchQuery,
    federation_options: FederationOptions,
    query_index: usize,
 }
 struct SearchResultByQuery<'a> {
    documents_ids: Vec<DocumentId>,
    document_scores: Vec<Vec<ScoreDetails>>,
    federation_options: FederationOptions,
    hit_maker: HitMaker<'a>,
    query_index: usize,
 }
 struct SearchResultByQueryIter<'a> {
    it: Zip<IntoIter<DocumentId>, IntoIter<Vec<ScoreDetails>>>,
    federation_options: FederationOptions,
    hit_maker: Rc<HitMaker<'a>>,
    query_index: usize,
 }
 impl<'a> SearchResultByQueryIter<'a> {
    fn new(
        SearchResultByQuery {
            documents_ids,
            document_scores,
            federation_options,
            hit_maker,
            query_index,
        }: SearchResultByQuery<'a>,
    ) -> Self {
        let it = documents_ids.into_iter().zip(document_scores);
        Self { it, federation_options, hit_maker: Rc::new(hit_maker), query_index }
    }
 }
 struct SearchResultByQueryIterItem<'a> {
    docid: DocumentId,
    score: Vec<ScoreDetails>,
    federation_options: FederationOptions,
    hit_maker: Rc<HitMaker<'a>>,
    query_index: usize,
 }
 fn merge_index_local_results(
    results_by_query: Vec<SearchResultByQuery<'_>>,
 ) -> impl Iterator<Item = SearchResultByQueryIterItem> + '_ {
    itertools::kmerge_by(
        results_by_query.into_iter().map(SearchResultByQueryIter::new),
        |left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| {
            let left_score = WeightedScore::new(&left.score, *left.federation_options.weight);
            let right_score = WeightedScore::new(&right.score, *right.federation_options.weight);
            match left_score.compare(&right_score) {
                // the biggest score goes first
                Ordering::Greater => true,
                // break ties using query index
                Ordering::Equal => left.query_index < right.query_index,
                Ordering::Less => false,
            }
        },
    )
 }
 fn merge_index_global_results(
    results_by_index: Vec<SearchResultByIndex>,
 ) -> impl Iterator<Item = SearchHitByIndex> {
    itertools::kmerge_by(
        results_by_index.into_iter().map(|result_by_index| result_by_index.hits.into_iter()),
        |left: &SearchHitByIndex, right: &SearchHitByIndex| {
            let left_score = WeightedScore::new(&left.score, *left.federation_options.weight);
            let right_score = WeightedScore::new(&right.score, *right.federation_options.weight);
            match left_score.compare(&right_score) {
                // the biggest score goes first
                Ordering::Greater => true,
                // break ties using query index
                Ordering::Equal => left.query_index < right.query_index,
                Ordering::Less => false,
            }
        },
    )
 }
 impl<'a> Iterator for SearchResultByQueryIter<'a> {
    type Item = SearchResultByQueryIterItem<'a>;
    fn next(&mut self) -> Option<Self::Item> {
        let (docid, score) = self.it.next()?;
        Some(SearchResultByQueryIterItem {
            docid,
            score,
            federation_options: self.federation_options,
            hit_maker: Rc::clone(&self.hit_maker),
            query_index: self.query_index,
        })
    }
 }
 struct SearchHitByIndex {
    hit: SearchHit,
    score: Vec<ScoreDetails>,
    federation_options: FederationOptions,
    query_index: usize,
 }
 struct SearchResultByIndex {
    hits: Vec<SearchHitByIndex>,
    candidates: RoaringBitmap,
    degraded: bool,
    used_negative_operator: bool,
 }
 pub fn perform_federated_search(
    index_scheduler: &IndexScheduler,
    queries: Vec<SearchQueryWithIndex>,
    federation: Federation,
    features: RoFeatures,
 ) -> Result<FederatedSearchResult, ResponseError> {
    let before_search = std::time::Instant::now();
    // this implementation partition the queries by index to guarantee an important property:
    // - all the queries to a particular index use the same read transaction.
    // This is an important property, otherwise we cannot guarantee the self-consistency of the results.
    // 1. partition queries by index
    let mut queries_by_index: BTreeMap<String, Vec<QueryByIndex>> = Default::default();
    for (query_index, federated_query) in queries.into_iter().enumerate() {
        if let Some(pagination_field) = federated_query.has_pagination() {
            return Err(MeilisearchHttpError::PaginationInFederatedQuery(
                query_index,
                pagination_field,
            )
            .into());
        }
        let (index_uid, query, federation_options) = federated_query.into_index_query_federation();
        queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex {
            query,
            federation_options: federation_options.unwrap_or_default(),
            query_index,
        })
    }
    // 2. perform queries, merge and make hits index by index
    let required_hit_count = federation.limit + federation.offset;
    // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic
    // Then in step (3), we'll update its value if there is any semantic search
    let mut semantic_hit_count = None;
    let mut results_by_index = Vec::with_capacity(queries_by_index.len());
    let mut previous_query_data: Option<(RankingRules, usize, String)> = None;
    for (index_uid, queries) in queries_by_index {
        let index = match index_scheduler.index(&index_uid) {
            Ok(index) => index,
            Err(err) => {
                let mut err = ResponseError::from(err);
                // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
                // here the resource not found is not part of the URL.
                err.code = StatusCode::BAD_REQUEST;
                if let Some(query) = queries.first() {
                    err.message =
                        format!("Inside `.queries[{}]`: {}", query.query_index, err.message);
                }
                return Err(err);
            }
        };
        // Important: this is the only transaction we'll use for this index during this federated search
        let rtxn = index.read_txn()?;
        let criteria = index.criteria(&rtxn)?;
        // stuff we need for the hitmaker
        let script_lang_map = index.script_language(&rtxn)?;
        let dictionary = index.dictionary(&rtxn)?;
        let dictionary: Option<Vec<_>> =
            dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
        let separators = index.allowed_separators(&rtxn)?;
        let separators: Option<Vec<_>> =
            separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
        // each query gets its individual cutoff
        let cutoff = index.search_cutoff(&rtxn)?;
        let mut degraded = false;
        let mut used_negative_operator = false;
        let mut candidates = RoaringBitmap::new();
        // 2.1. Compute all candidates for each query in the index
        let mut results_by_query = Vec::with_capacity(queries.len());
        for QueryByIndex { query, federation_options, query_index } in queries {
            // use an immediately invoked lambda to capture the result without returning from the function
            let res: Result<(), ResponseError> = (|| {
                let search_kind = search_kind(&query, index_scheduler, &index, features)?;
                let canonicalization_kind = match (&search_kind, &query.q) {
                    (SearchKind::SemanticOnly { .. }, _) => {
                        ranking_rules::CanonicalizationKind::Vector
                    }
                    (_, Some(q)) if !q.is_empty() => ranking_rules::CanonicalizationKind::Keyword,
                    _ => ranking_rules::CanonicalizationKind::Placeholder,
                };
                let sort = if let Some(sort) = &query.sort {
                    let sorts: Vec<_> =
                        match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
                            Ok(sorts) => sorts,
                            Err(asc_desc_error) => {
                                return Err(milli::Error::from(milli::SortError::from(
                                    asc_desc_error,
                                ))
                                .into())
                            }
                        };
                    Some(sorts)
                } else {
                    None
                };
                let ranking_rules = ranking_rules::RankingRules::new(
                    criteria.clone(),
                    sort,
                    query.matching_strategy.into(),
                    canonicalization_kind,
                );
                if let Some((previous_ranking_rules, previous_query_index, previous_index_uid)) =
                    previous_query_data.take()
                {
                    if let Err(error) = ranking_rules.is_compatible_with(&previous_ranking_rules) {
                        return Err(error.to_response_error(
                            &ranking_rules,
                            &previous_ranking_rules,
                            query_index,
                            previous_query_index,
                            &index_uid,
                            &previous_index_uid,
                        ));
                    }
                    previous_query_data = if previous_ranking_rules.constraint_count()
                        > ranking_rules.constraint_count()
                    {
                        Some((previous_ranking_rules, previous_query_index, previous_index_uid))
                    } else {
                        Some((ranking_rules, query_index, index_uid.clone()))
                    };
                } else {
                    previous_query_data = Some((ranking_rules, query_index, index_uid.clone()));
                }
                match search_kind {
                    SearchKind::KeywordOnly => {}
                    _ => semantic_hit_count = Some(0),
                }
                let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
                let time_budget = match cutoff {
                    Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
                    None => TimeBudget::default(),
                };
                let (mut search, _is_finite_pagination, _max_total_hits, _offset) =
                    prepare_search(&index, &rtxn, &query, &search_kind, time_budget)?;
                search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed);
                search.offset(0);
                search.limit(required_hit_count);
                let (result, _semantic_hit_count) = super::search_from_kind(search_kind, search)?;
                let format = AttributesFormat {
                    attributes_to_retrieve: query.attributes_to_retrieve,
                    retrieve_vectors,
                    attributes_to_highlight: query.attributes_to_highlight,
                    attributes_to_crop: query.attributes_to_crop,
                    crop_length: query.crop_length,
                    crop_marker: query.crop_marker,
                    highlight_pre_tag: query.highlight_pre_tag,
                    highlight_post_tag: query.highlight_post_tag,
                    show_matches_position: query.show_matches_position,
                    sort: query.sort,
                    show_ranking_score: query.show_ranking_score,
                    show_ranking_score_details: query.show_ranking_score_details,
                };
                let milli::SearchResult {
                    matching_words,
                    candidates: query_candidates,
                    documents_ids,
                    document_scores,
                    degraded: query_degraded,
                    used_negative_operator: query_used_negative_operator,
                } = result;
                candidates |= query_candidates;
                degraded |= query_degraded;
                used_negative_operator |= query_used_negative_operator;
                let tokenizer = HitMaker::tokenizer(
                    &script_lang_map,
                    dictionary.as_deref(),
                    separators.as_deref(),
                );
                let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
                let hit_maker = HitMaker::new(&index, &rtxn, format, formatter_builder)?;
                results_by_query.push(SearchResultByQuery {
                    federation_options,
                    hit_maker,
                    query_index,
                    documents_ids,
                    document_scores,
                });
                Ok(())
            })();
            if let Err(mut error) = res {
                error.message = format!("Inside `.queries[{query_index}]`: {}", error.message);
                return Err(error);
            }
        }
        // 2.2. merge inside index
        let mut documents_seen = RoaringBitmap::new();
        let merged_result: Result<Vec<_>, ResponseError> =
            merge_index_local_results(results_by_query)
                // skip documents we've already seen & mark that we saw the current document
                .filter(|SearchResultByQueryIterItem { docid, .. }| documents_seen.insert(*docid))
                .take(required_hit_count)
                // 2.3 make hits
                .map(
                    |SearchResultByQueryIterItem {
                         docid,
                         score,
                         federation_options,
                         hit_maker,
                         query_index,
                     }| {
                        let mut hit = hit_maker.make_hit(docid, &score)?;
                        let weighted_score =
                            ScoreDetails::global_score(score.iter()) * (*federation_options.weight);
                        let _federation = serde_json::json!(
                            {
                                "indexUid": index_uid,
                                "queriesPosition": query_index,
                                "weightedRankingScore": weighted_score,
                            }
                        );
                        hit.document.insert("_federation".to_string(), _federation);
                        Ok(SearchHitByIndex { hit, score, federation_options, query_index })
                    },
                )
                .collect();
        let merged_result = merged_result?;
        results_by_index.push(SearchResultByIndex {
            hits: merged_result,
            candidates,
            degraded,
            used_negative_operator,
        });
    }
    // 3. merge hits and metadata across indexes
    // 3.1 merge metadata
    let (estimated_total_hits, degraded, used_negative_operator) = {
        let mut estimated_total_hits = 0;
        let mut degraded = false;
        let mut used_negative_operator = false;
        for SearchResultByIndex {
            hits: _,
            candidates,
            degraded: degraded_by_index,
            used_negative_operator: used_negative_operator_by_index,
        } in &results_by_index
        {
            estimated_total_hits += candidates.len() as usize;
            degraded |= *degraded_by_index;
            used_negative_operator |= *used_negative_operator_by_index;
        }
        (estimated_total_hits, degraded, used_negative_operator)
    };
    // 3.2 merge hits
    let merged_hits: Vec<_> = merge_index_global_results(results_by_index)
        .skip(federation.offset)
        .take(federation.limit)
        .inspect(|hit| {
            if let Some(semantic_hit_count) = &mut semantic_hit_count {
                if hit.score.iter().any(|score| matches!(&score, ScoreDetails::Vector(_))) {
                    *semantic_hit_count += 1;
                }
            }
        })
        .map(|hit| hit.hit)
        .collect();
    let search_result = FederatedSearchResult {
        hits: merged_hits,
        processing_time_ms: before_search.elapsed().as_millis(),
        hits_info: HitsInfo::OffsetLimit {
            limit: federation.limit,
            offset: federation.offset,
            estimated_total_hits,
        },
        semantic_hit_count,
        degraded,
        used_negative_operator,
    };
    Ok(search_result)
 }
--- a/meilisearch/src/search/mod.rs
+++ b/meilisearch/src/search/mod.rs
@ -1,6 +1,6 @@
 use core::fmt;
 use std::cmp::min;
-use std::collections::{BTreeMap, BTreeSet, HashSet};
+use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::{Duration, Instant};
@ -31,6 +31,11 @@ use serde_json::{json, Value};
 use crate::error::MeilisearchHttpError;
 mod federated;
 pub use federated::{perform_federated_search, FederatedSearch, Federation, FederationOptions};
 mod ranking_rules;
 type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
 pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0;
@ -257,11 +262,13 @@ pub struct HybridQuery {
    pub embedder: Option<String>,
 }
 #[derive(Clone)]
 pub enum SearchKind {
    KeywordOnly,
    SemanticOnly { embedder_name: String, embedder: Arc<Embedder> },
    Hybrid { embedder_name: String, embedder: Arc<Embedder>, semantic_ratio: f32 },
 }
 impl SearchKind {
    pub(crate) fn semantic(
        index_scheduler: &index_scheduler::IndexScheduler,
@ -358,7 +365,7 @@ impl SearchQuery {
    }
 }
-/// A `SearchQuery` + an index UID.
+/// A `SearchQuery` + an index UID and optional FederationOptions.
 // This struct contains the fields of `SearchQuery` inline.
 // This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields.
 // The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date.
@ -373,10 +380,10 @@ pub struct SearchQueryWithIndex {
    pub vector: Option<Vec<f32>>,
    #[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
    pub hybrid: Option<HybridQuery>,
-    #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
+    #[deserr(default, error = DeserrJsonError<InvalidSearchOffset>)]
-    pub offset: usize,
+    pub offset: Option<usize>,
-    #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
+    #[deserr(default, error = DeserrJsonError<InvalidSearchLimit>)]
-    pub limit: usize,
+    pub limit: Option<usize>,
    #[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
    pub page: Option<usize>,
    #[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
@ -417,12 +424,33 @@ pub struct SearchQueryWithIndex {
    pub attributes_to_search_on: Option<Vec<String>>,
    #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
    pub ranking_score_threshold: Option<RankingScoreThreshold>,
    #[deserr(default)]
    pub federation_options: Option<FederationOptions>,
 }
 impl SearchQueryWithIndex {
-    pub fn into_index_query(self) -> (IndexUid, SearchQuery) {
+    pub fn has_federation_options(&self) -> bool {
        self.federation_options.is_some()
    }
    pub fn has_pagination(&self) -> Option<&'static str> {
        if self.offset.is_some() {
            Some("offset")
        } else if self.limit.is_some() {
            Some("limit")
        } else if self.page.is_some() {
            Some("page")
        } else if self.hits_per_page.is_some() {
            Some("hitsPerPage")
        } else {
            None
        }
    }
    pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) {
        let SearchQueryWithIndex {
            index_uid,
            federation_options,
            q,
            vector,
            offset,
@ -454,8 +482,8 @@ impl SearchQueryWithIndex {
            SearchQuery {
                q,
                vector,
-                offset,
+                offset: offset.unwrap_or(DEFAULT_SEARCH_OFFSET()),
-                limit,
+                limit: limit.unwrap_or(DEFAULT_SEARCH_LIMIT()),
                page,
                hits_per_page,
                attributes_to_retrieve,
@ -480,6 +508,7 @@ impl SearchQueryWithIndex {
                // do not use ..Default::default() here,
                // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
            },
            federation_options,
        )
    }
 }
@ -864,15 +893,7 @@ pub fn perform_search(
            used_negative_operator,
        },
        semantic_hit_count,
-    ) = match &search_kind {
+    ) = search_from_kind(search_kind, search)?;
        SearchKind::KeywordOnly => (search.execute()?, None),
        SearchKind::SemanticOnly { .. } => {
            let results = search.execute()?;
            let semantic_hit_count = results.document_scores.len() as u32;
            (results, Some(semantic_hit_count))
        }
        SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?,
    };
    let SearchQuery {
        q,
@ -919,8 +940,13 @@ pub fn perform_search(
        show_ranking_score_details,
    };
-    let documents =
+    let documents = make_hits(
-        make_hits(index, &rtxn, format, matching_words, documents_ids, document_scores)?;
+        index,
        &rtxn,
        format,
        matching_words,
        documents_ids.iter().copied().zip(document_scores.iter()),
    )?;
    let number_of_hits = min(candidates.len() as usize, max_total_hits);
    let hits_info = if is_finite_pagination {
@ -988,6 +1014,22 @@ pub fn perform_search(
    Ok(result)
 }
 pub fn search_from_kind(
    search_kind: SearchKind,
    search: milli::Search<'_>,
 ) -> Result<(milli::SearchResult, Option<u32>), MeilisearchHttpError> {
    let (milli_result, semantic_hit_count) = match &search_kind {
        SearchKind::KeywordOnly => (search.execute()?, None),
        SearchKind::SemanticOnly { .. } => {
            let results = search.execute()?;
            let semantic_hit_count = results.document_scores.len() as u32;
            (results, Some(semantic_hit_count))
        }
        SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?,
    };
    Ok((milli_result, semantic_hit_count))
 }
 struct AttributesFormat {
    attributes_to_retrieve: Option<BTreeSet<String>>,
    retrieve_vectors: RetrieveVectors,
@ -1033,19 +1075,72 @@ impl RetrieveVectors {
    }
 }
-fn make_hits(
+struct HitMaker<'a> {
-    index: &Index,
+    index: &'a Index,
-    rtxn: &RoTxn<'_>,
+    rtxn: &'a RoTxn<'a>,
-    format: AttributesFormat,
+    fields_ids_map: FieldsIdsMap,
-    matching_words: milli::MatchingWords,
+    displayed_ids: BTreeSet<FieldId>,
-    documents_ids: Vec<u32>,
+    vectors_fid: Option<FieldId>,
-    document_scores: Vec<Vec<ScoreDetails>>,
+    retrieve_vectors: RetrieveVectors,
-) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
+    to_retrieve_ids: BTreeSet<FieldId>,
-    let fields_ids_map = index.fields_ids_map(rtxn).unwrap();
+    embedding_configs: Vec<milli::index::IndexEmbeddingConfig>,
-    let displayed_ids =
+    formatter_builder: MatcherBuilder<'a>,
-        index.displayed_fields_ids(rtxn)?.map(|fields| fields.into_iter().collect::<BTreeSet<_>>());
+    formatted_options: BTreeMap<FieldId, FormatOptions>,
    show_ranking_score: bool,
    show_ranking_score_details: bool,
    sort: Option<Vec<String>>,
    show_matches_position: bool,
 }
-    let vectors_fid = fields_ids_map.id(milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME);
+impl<'a> HitMaker<'a> {
    pub fn tokenizer<'b>(
        script_lang_map: &'b HashMap<milli::tokenizer::Script, Vec<milli::tokenizer::Language>>,
        dictionary: Option<&'b [&'b str]>,
        separators: Option<&'b [&'b str]>,
    ) -> milli::tokenizer::Tokenizer<'b> {
        let mut tokenizer_builder = TokenizerBuilder::default();
        tokenizer_builder.create_char_map(true);
        if !script_lang_map.is_empty() {
            tokenizer_builder.allow_list(script_lang_map);
        }
        if let Some(separators) = separators {
            tokenizer_builder.separators(separators);
        }
        if let Some(dictionary) = dictionary {
            tokenizer_builder.words_dict(dictionary);
        }
        tokenizer_builder.into_tokenizer()
    }
    pub fn formatter_builder(
        matching_words: milli::MatchingWords,
        tokenizer: milli::tokenizer::Tokenizer<'_>,
    ) -> MatcherBuilder<'_> {
        let formatter_builder = MatcherBuilder::new(matching_words, tokenizer);
        formatter_builder
    }
    pub fn new(
        index: &'a Index,
        rtxn: &'a RoTxn<'a>,
        format: AttributesFormat,
        mut formatter_builder: MatcherBuilder<'a>,
    ) -> Result<Self, MeilisearchHttpError> {
        formatter_builder.crop_marker(format.crop_marker);
        formatter_builder.highlight_prefix(format.highlight_pre_tag);
        formatter_builder.highlight_suffix(format.highlight_post_tag);
        let fields_ids_map = index.fields_ids_map(rtxn)?;
        let displayed_ids = index
            .displayed_fields_ids(rtxn)?
            .map(|fields| fields.into_iter().collect::<BTreeSet<_>>());
        let vectors_fid =
            fields_ids_map.id(milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME);
        let vectors_is_hidden = match (&displayed_ids, vectors_fid) {
            // displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid
@ -1056,6 +1151,9 @@ fn make_hits(
            (Some(map), Some(vectors_fid)) => map.contains(&vectors_fid),
        };
        let displayed_ids =
            displayed_ids.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
        let retrieve_vectors = if let RetrieveVectors::Retrieve = format.retrieve_vectors {
            if vectors_is_hidden {
                RetrieveVectors::Hide
@ -1066,8 +1164,6 @@ fn make_hits(
            format.retrieve_vectors
        };
    let displayed_ids =
        displayed_ids.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
        let fids = |attrs: &BTreeSet<String>| {
            let mut ids = BTreeSet::new();
            for attr in attrs {
@ -1101,63 +1197,69 @@ fn make_hits(
            &fields_ids_map,
            &displayed_ids,
        );
-    let mut tokenizer_builder = TokenizerBuilder::default();
+
    tokenizer_builder.create_char_map(true);
    let script_lang_map = index.script_language(rtxn)?;
    if !script_lang_map.is_empty() {
        tokenizer_builder.allow_list(&script_lang_map);
    }
    let separators = index.allowed_separators(rtxn)?;
    let separators: Option<Vec<_>> =
        separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
    if let Some(ref separators) = separators {
        tokenizer_builder.separators(separators);
    }
    let dictionary = index.dictionary(rtxn)?;
    let dictionary: Option<Vec<_>> =
        dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
    if let Some(ref dictionary) = dictionary {
        tokenizer_builder.words_dict(dictionary);
    }
    let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
    formatter_builder.crop_marker(format.crop_marker);
    formatter_builder.highlight_prefix(format.highlight_pre_tag);
    formatter_builder.highlight_suffix(format.highlight_post_tag);
    let mut documents = Vec::new();
        let embedding_configs = index.embedding_configs(rtxn)?;
-    let documents_iter = index.documents(rtxn, documents_ids)?;
+
-    for ((id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
+        Ok(Self {
            index,
            rtxn,
            fields_ids_map,
            displayed_ids,
            vectors_fid,
            retrieve_vectors,
            to_retrieve_ids,
            embedding_configs,
            formatter_builder,
            formatted_options,
            show_ranking_score: format.show_ranking_score,
            show_ranking_score_details: format.show_ranking_score_details,
            show_matches_position: format.show_matches_position,
            sort: format.sort,
        })
    }
    pub fn make_hit(
        &self,
        id: u32,
        score: &[ScoreDetails],
    ) -> Result<SearchHit, MeilisearchHttpError> {
        let (_, obkv) =
            self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
        // First generate a document with all the displayed fields
-        let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
+        let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, obkv)?;
        let add_vectors_fid =
-            vectors_fid.filter(|_fid| retrieve_vectors == RetrieveVectors::Retrieve);
+            self.vectors_fid.filter(|_fid| self.retrieve_vectors == RetrieveVectors::Retrieve);
        // select the attributes to retrieve
-        let attributes_to_retrieve = to_retrieve_ids
+        let attributes_to_retrieve = self
            .to_retrieve_ids
            .iter()
            // skip the vectors_fid if RetrieveVectors::Hide
-            .filter(|fid| match vectors_fid {
+            .filter(|fid| match self.vectors_fid {
                Some(vectors_fid) => {
-                    !(retrieve_vectors == RetrieveVectors::Hide && **fid == vectors_fid)
+                    !(self.retrieve_vectors == RetrieveVectors::Hide && **fid == vectors_fid)
                }
                None => true,
            })
            // need to retrieve the existing `_vectors` field if the `RetrieveVectors::Retrieve`
            .chain(add_vectors_fid.iter())
-            .map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
+            .map(|&fid| self.fields_ids_map.name(fid).expect("Missing field name"));
        let mut document =
            permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
-        if retrieve_vectors == RetrieveVectors::Retrieve {
+        if self.retrieve_vectors == RetrieveVectors::Retrieve {
            // Clippy is wrong
            #[allow(clippy::manual_unwrap_or_default)]
            let mut vectors = match document.remove("_vectors") {
                Some(Value::Object(map)) => map,
                _ => Default::default(),
            };
-            for (name, vector) in index.embeddings(rtxn, id)? {
+            for (name, vector) in self.index.embeddings(self.rtxn, id)? {
-                let user_provided = embedding_configs
+                let user_provided = self
                    .embedding_configs
                    .iter()
                    .find(|conf| conf.name == name)
                    .is_some_and(|conf| conf.user_provided.contains(id));
@ -1170,21 +1272,21 @@ fn make_hits(
        let (matches_position, formatted) = format_fields(
            &displayed_document,
-            &fields_ids_map,
+            &self.fields_ids_map,
-            &formatter_builder,
+            &self.formatter_builder,
-            &formatted_options,
+            &self.formatted_options,
-            format.show_matches_position,
+            self.show_matches_position,
-            &displayed_ids,
+            &self.displayed_ids,
        )?;
-        if let Some(sort) = format.sort.as_ref() {
+        if let Some(sort) = self.sort.as_ref() {
            insert_geo_distance(sort, &mut document);
        }
        let ranking_score =
-            format.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
+            self.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
        let ranking_score_details =
-            format.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
+            self.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
        let hit = SearchHit {
            document,
@ -1193,7 +1295,38 @@ fn make_hits(
            ranking_score_details,
            ranking_score,
        };
-        documents.push(hit);
+
        Ok(hit)
    }
 }
 fn make_hits<'a>(
    index: &Index,
    rtxn: &RoTxn<'_>,
    format: AttributesFormat,
    matching_words: milli::MatchingWords,
    documents_ids_scores: impl Iterator<Item = (u32, &'a Vec<ScoreDetails>)> + 'a,
 ) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
    let mut documents = Vec::new();
    let script_lang_map = index.script_language(rtxn)?;
    let dictionary = index.dictionary(rtxn)?;
    let dictionary: Option<Vec<_>> =
        dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
    let separators = index.allowed_separators(rtxn)?;
    let separators: Option<Vec<_>> =
        separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
    let tokenizer =
        HitMaker::tokenizer(&script_lang_map, dictionary.as_deref(), separators.as_deref());
    let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
    let hit_maker = HitMaker::new(index, rtxn, format, formatter_builder)?;
    for (id, score) in documents_ids_scores {
        documents.push(hit_maker.make_hit(id, score)?);
    }
    Ok(documents)
 }
@ -1309,7 +1442,13 @@ pub fn perform_similar(
        show_ranking_score_details,
    };
-    let hits = make_hits(index, &rtxn, format, Default::default(), documents_ids, document_scores)?;
+    let hits = make_hits(
        index,
        &rtxn,
        format,
        Default::default(),
        documents_ids.iter().copied().zip(document_scores.iter()),
    )?;
    let max_total_hits = index
        .pagination_max_total_hits(&rtxn)
@ -1482,10 +1621,10 @@ fn make_document(
    Ok(document)
 }
-fn format_fields<'a>(
+fn format_fields(
    document: &Document,
    field_ids_map: &FieldsIdsMap,
-    builder: &'a MatcherBuilder<'a>,
+    builder: &MatcherBuilder<'_>,
    formatted_options: &BTreeMap<FieldId, FormatOptions>,
    compute_matches: bool,
    displayable_ids: &BTreeSet<FieldId>,
@ -1540,9 +1679,9 @@ fn format_fields<'a>(
    Ok((matches_position, document))
 }
-fn format_value<'a>(
+fn format_value(
    value: Value,
-    builder: &'a MatcherBuilder<'a>,
+    builder: &MatcherBuilder<'_>,
    format_options: Option<FormatOptions>,
    infos: &mut Vec<MatchBounds>,
    compute_matches: bool,
--- a/meilisearch/src/search/ranking_rules.rs
+++ b/meilisearch/src/search/ranking_rules.rs
@ -0,0 +1,823 @@
 use std::collections::HashMap;
 use std::fmt::Write;
 use itertools::Itertools as _;
 use meilisearch_types::error::{Code, ResponseError};
 use meilisearch_types::milli::{AscDesc, Criterion, Member, TermsMatchingStrategy};
 pub struct RankingRules {
    canonical_criteria: Vec<Criterion>,
    canonical_sort: Option<Vec<AscDesc>>,
    canonicalization_actions: Vec<CanonicalizationAction>,
    source_criteria: Vec<Criterion>,
    source_sort: Option<Vec<AscDesc>>,
 }
 pub enum CanonicalizationAction {
    PrependedWords {
        prepended_index: RankingRuleSource,
    },
    RemovedDuplicate {
        earlier_occurrence: RankingRuleSource,
        removed_occurrence: RankingRuleSource,
    },
    RemovedWords {
        reason: RemoveWords,
        removed_occurrence: RankingRuleSource,
    },
    RemovedPlaceholder {
        removed_occurrence: RankingRuleSource,
    },
    TruncatedVector {
        vector_rule: RankingRuleSource,
        truncated_from: RankingRuleSource,
    },
    RemovedVector {
        vector_rule: RankingRuleSource,
        removed_occurrence: RankingRuleSource,
    },
    RemovedSort {
        removed_occurrence: RankingRuleSource,
    },
 }
 pub enum RemoveWords {
    WasPrepended,
    MatchingStrategyAll,
 }
 impl std::fmt::Display for RemoveWords {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let reason = match self {
            RemoveWords::WasPrepended => "it was previously prepended",
            RemoveWords::MatchingStrategyAll => "`query.matchingWords` is set to `all`",
        };
        f.write_str(reason)
    }
 }
 pub enum CanonicalizationKind {
    Placeholder,
    Keyword,
    Vector,
 }
 pub struct CompatibilityError {
    previous: RankingRule,
    current: RankingRule,
 }
 impl CompatibilityError {
    pub(crate) fn to_response_error(
        &self,
        ranking_rules: &RankingRules,
        previous_ranking_rules: &RankingRules,
        query_index: usize,
        previous_query_index: usize,
        index_uid: &str,
        previous_index_uid: &str,
    ) -> meilisearch_types::error::ResponseError {
        let rule = self.current.as_string(
            &ranking_rules.canonical_criteria,
            &ranking_rules.canonical_sort,
            query_index,
            index_uid,
        );
        let previous_rule = self.previous.as_string(
            &previous_ranking_rules.canonical_criteria,
            &previous_ranking_rules.canonical_sort,
            previous_query_index,
            previous_index_uid,
        );
        let canonicalization_actions = ranking_rules.canonicalization_notes();
        let previous_canonicalization_actions = previous_ranking_rules.canonicalization_notes();
        let mut msg = String::new();
        let reason = self.reason();
        let _ = writeln!(
            &mut msg,
            "The results of queries #{previous_query_index} and #{query_index} are incompatible: "
        );
        let _ = writeln!(&mut msg, "  1. {previous_rule}");
        let _ = writeln!(&mut msg, "  2. {rule}");
        let _ = writeln!(&mut msg, "  - {reason}");
        if !previous_canonicalization_actions.is_empty() {
            let _ = write!(&mut msg, "  - note: The ranking rules of query #{previous_query_index} were modified during canonicalization:\n{previous_canonicalization_actions}");
        }
        if !canonicalization_actions.is_empty() {
            let _ = write!(&mut msg, "  - note: The ranking rules of query #{query_index} were modified during canonicalization:\n{canonicalization_actions}");
        }
        ResponseError::from_msg(msg, Code::InvalidMultiSearchQueryRankingRules)
    }
    pub fn reason(&self) -> &'static str {
        match (self.previous.kind, self.current.kind) {
            (RankingRuleKind::Relevancy, RankingRuleKind::AscendingSort)
            | (RankingRuleKind::Relevancy, RankingRuleKind::DescendingSort)
            | (RankingRuleKind::AscendingSort, RankingRuleKind::Relevancy)
            | (RankingRuleKind::DescendingSort, RankingRuleKind::Relevancy) => {
                "cannot compare a relevancy rule with a sort rule"
            }
            (RankingRuleKind::Relevancy, RankingRuleKind::AscendingGeoSort)
            | (RankingRuleKind::Relevancy, RankingRuleKind::DescendingGeoSort)
            | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::Relevancy)
            | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::Relevancy) => {
                "cannot compare a relevancy rule with a geosort rule"
            }
            (RankingRuleKind::AscendingSort, RankingRuleKind::DescendingSort)
            | (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingSort) => {
                "cannot compare two sort rules in opposite directions"
            }
            (RankingRuleKind::AscendingSort, RankingRuleKind::AscendingGeoSort)
            | (RankingRuleKind::AscendingSort, RankingRuleKind::DescendingGeoSort)
            | (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingGeoSort)
            | (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingGeoSort)
            | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingSort)
            | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingSort)
            | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingSort)
            | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingSort) => {
                "cannot compare a sort rule with a geosort rule"
            }
            (RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingGeoSort)
            | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingGeoSort) => {
                "cannot compare two geosort rules in opposite directions"
            }
            (RankingRuleKind::Relevancy, RankingRuleKind::Relevancy)
            | (RankingRuleKind::AscendingSort, RankingRuleKind::AscendingSort)
            | (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingSort)
            | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingGeoSort)
            | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingGeoSort) => {
                "internal error, comparison should be possible"
            }
        }
    }
 }
 impl RankingRules {
    pub fn new(
        criteria: Vec<Criterion>,
        sort: Option<Vec<AscDesc>>,
        terms_matching_strategy: TermsMatchingStrategy,
        canonicalization_kind: CanonicalizationKind,
    ) -> Self {
        let (canonical_criteria, canonical_sort, canonicalization_actions) =
            Self::canonicalize(&criteria, &sort, terms_matching_strategy, canonicalization_kind);
        Self {
            canonical_criteria,
            canonical_sort,
            canonicalization_actions,
            source_criteria: criteria,
            source_sort: sort,
        }
    }
    fn canonicalize(
        criteria: &[Criterion],
        sort: &Option<Vec<AscDesc>>,
        terms_matching_strategy: TermsMatchingStrategy,
        canonicalization_kind: CanonicalizationKind,
    ) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
        match canonicalization_kind {
            CanonicalizationKind::Placeholder => Self::canonicalize_placeholder(criteria, sort),
            CanonicalizationKind::Keyword => {
                Self::canonicalize_keyword(criteria, sort, terms_matching_strategy)
            }
            CanonicalizationKind::Vector => Self::canonicalize_vector(criteria, sort),
        }
    }
    fn canonicalize_placeholder(
        criteria: &[Criterion],
        sort_query: &Option<Vec<AscDesc>>,
    ) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
        let mut sort = None;
        let mut sorted_fields = HashMap::new();
        let mut canonicalization_actions = Vec::new();
        let mut canonical_criteria = Vec::new();
        let mut canonical_sort = None;
        for (criterion_index, criterion) in criteria.iter().enumerate() {
            match criterion.clone() {
                Criterion::Words
                | Criterion::Typo
                | Criterion::Proximity
                | Criterion::Attribute
                | Criterion::Exactness => {
                    canonicalization_actions.push(CanonicalizationAction::RemovedPlaceholder {
                        removed_occurrence: RankingRuleSource::Criterion(criterion_index),
                    })
                }
                Criterion::Sort => {
                    if let Some(previous_index) = sort {
                        canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
                            earlier_occurrence: RankingRuleSource::Criterion(previous_index),
                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
                        });
                    } else if let Some(sort_query) = sort_query {
                        sort = Some(criterion_index);
                        canonical_criteria.push(criterion.clone());
                        canonical_sort = Some(canonicalize_sort(
                            &mut sorted_fields,
                            sort_query.as_slice(),
                            criterion_index,
                            &mut canonicalization_actions,
                        ));
                    } else {
                        canonicalization_actions.push(CanonicalizationAction::RemovedSort {
                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
                        })
                    }
                }
                Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
                    std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
                        .push(CanonicalizationAction::RemovedDuplicate {
                            earlier_occurrence: *entry.get(),
                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
                        }),
                    std::collections::hash_map::Entry::Vacant(entry) => {
                        entry.insert(RankingRuleSource::Criterion(criterion_index));
                        canonical_criteria.push(criterion.clone())
                    }
                },
            }
        }
        (canonical_criteria, canonical_sort, canonicalization_actions)
    }
    fn canonicalize_vector(
        criteria: &[Criterion],
        sort_query: &Option<Vec<AscDesc>>,
    ) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
        let mut sort = None;
        let mut sorted_fields = HashMap::new();
        let mut canonicalization_actions = Vec::new();
        let mut canonical_criteria = Vec::new();
        let mut canonical_sort = None;
        let mut vector = None;
        'criteria: for (criterion_index, criterion) in criteria.iter().enumerate() {
            match criterion.clone() {
                Criterion::Words
                | Criterion::Typo
                | Criterion::Proximity
                | Criterion::Attribute
                | Criterion::Exactness => match vector {
                    Some(previous_occurrence) => {
                        if sorted_fields.is_empty() {
                            canonicalization_actions.push(CanonicalizationAction::RemovedVector {
                                vector_rule: RankingRuleSource::Criterion(previous_occurrence),
                                removed_occurrence: RankingRuleSource::Criterion(criterion_index),
                            });
                        } else {
                            canonicalization_actions.push(
                                CanonicalizationAction::TruncatedVector {
                                    vector_rule: RankingRuleSource::Criterion(previous_occurrence),
                                    truncated_from: RankingRuleSource::Criterion(criterion_index),
                                },
                            );
                            break 'criteria;
                        }
                    }
                    None => {
                        canonical_criteria.push(criterion.clone());
                        vector = Some(criterion_index);
                    }
                },
                Criterion::Sort => {
                    if let Some(previous_index) = sort {
                        canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
                            earlier_occurrence: RankingRuleSource::Criterion(previous_index),
                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
                        });
                    } else if let Some(sort_query) = sort_query {
                        sort = Some(criterion_index);
                        canonical_criteria.push(criterion.clone());
                        canonical_sort = Some(canonicalize_sort(
                            &mut sorted_fields,
                            sort_query.as_slice(),
                            criterion_index,
                            &mut canonicalization_actions,
                        ));
                    } else {
                        canonicalization_actions.push(CanonicalizationAction::RemovedSort {
                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
                        })
                    }
                }
                Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
                    std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
                        .push(CanonicalizationAction::RemovedDuplicate {
                            earlier_occurrence: *entry.get(),
                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
                        }),
                    std::collections::hash_map::Entry::Vacant(entry) => {
                        entry.insert(RankingRuleSource::Criterion(criterion_index));
                        canonical_criteria.push(criterion.clone())
                    }
                },
            }
        }
        (canonical_criteria, canonical_sort, canonicalization_actions)
    }
    fn canonicalize_keyword(
        criteria: &[Criterion],
        sort_query: &Option<Vec<AscDesc>>,
        terms_matching_strategy: TermsMatchingStrategy,
    ) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
        let mut words = None;
        let mut typo = None;
        let mut proximity = None;
        let mut sort = None;
        let mut attribute = None;
        let mut exactness = None;
        let mut sorted_fields = HashMap::new();
        let mut canonical_criteria = Vec::new();
        let mut canonical_sort = None;
        let mut canonicalization_actions = Vec::new();
        for (criterion_index, criterion) in criteria.iter().enumerate() {
            let criterion = criterion.clone();
            match criterion.clone() {
                Criterion::Words => {
                    if let TermsMatchingStrategy::All = terms_matching_strategy {
                        canonicalization_actions.push(CanonicalizationAction::RemovedWords {
                            reason: RemoveWords::MatchingStrategyAll,
                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
                        });
                        continue;
                    }
                    if let Some(maybe_previous_index) = words {
                        if let Some(previous_index) = maybe_previous_index {
                            canonicalization_actions.push(
                                CanonicalizationAction::RemovedDuplicate {
                                    earlier_occurrence: RankingRuleSource::Criterion(
                                        previous_index,
                                    ),
                                    removed_occurrence: RankingRuleSource::Criterion(
                                        criterion_index,
                                    ),
                                },
                            );
                            continue;
                        }
                        canonicalization_actions.push(CanonicalizationAction::RemovedWords {
                            reason: RemoveWords::WasPrepended,
                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
                        })
                    }
                    words = Some(Some(criterion_index));
                    canonical_criteria.push(criterion);
                }
                Criterion::Typo => {
                    canonicalize_criterion(
                        criterion,
                        criterion_index,
                        terms_matching_strategy,
                        &mut words,
                        &mut canonicalization_actions,
                        &mut canonical_criteria,
                        &mut typo,
                    );
                }
                Criterion::Proximity => {
                    canonicalize_criterion(
                        criterion,
                        criterion_index,
                        terms_matching_strategy,
                        &mut words,
                        &mut canonicalization_actions,
                        &mut canonical_criteria,
                        &mut proximity,
                    );
                }
                Criterion::Attribute => {
                    canonicalize_criterion(
                        criterion,
                        criterion_index,
                        terms_matching_strategy,
                        &mut words,
                        &mut canonicalization_actions,
                        &mut canonical_criteria,
                        &mut attribute,
                    );
                }
                Criterion::Exactness => {
                    canonicalize_criterion(
                        criterion,
                        criterion_index,
                        terms_matching_strategy,
                        &mut words,
                        &mut canonicalization_actions,
                        &mut canonical_criteria,
                        &mut exactness,
                    );
                }
                Criterion::Sort => {
                    if let Some(previous_index) = sort {
                        canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
                            earlier_occurrence: RankingRuleSource::Criterion(previous_index),
                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
                        });
                    } else if let Some(sort_query) = sort_query {
                        sort = Some(criterion_index);
                        canonical_criteria.push(criterion);
                        canonical_sort = Some(canonicalize_sort(
                            &mut sorted_fields,
                            sort_query.as_slice(),
                            criterion_index,
                            &mut canonicalization_actions,
                        ));
                    } else {
                        canonicalization_actions.push(CanonicalizationAction::RemovedSort {
                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
                        })
                    }
                }
                Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
                    std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
                        .push(CanonicalizationAction::RemovedDuplicate {
                            earlier_occurrence: *entry.get(),
                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
                        }),
                    std::collections::hash_map::Entry::Vacant(entry) => {
                        entry.insert(RankingRuleSource::Criterion(criterion_index));
                        canonical_criteria.push(criterion)
                    }
                },
            }
        }
        (canonical_criteria, canonical_sort, canonicalization_actions)
    }
    pub fn is_compatible_with(&self, previous: &Self) -> Result<(), CompatibilityError> {
        for (current, previous) in self.coalesce_iterator().zip(previous.coalesce_iterator()) {
            if current.kind != previous.kind {
                return Err(CompatibilityError { current, previous });
            }
        }
        Ok(())
    }
    pub fn constraint_count(&self) -> usize {
        self.coalesce_iterator().count()
    }
    fn coalesce_iterator(&self) -> impl Iterator<Item = RankingRule> + '_ {
        self.canonical_criteria
            .iter()
            .enumerate()
            .flat_map(|(criterion_index, criterion)| {
                RankingRule::from_criterion(criterion_index, criterion, &self.canonical_sort)
            })
            .coalesce(
                |previous @ RankingRule { source: previous_source, kind: previous_kind },
                 current @ RankingRule { source, kind }| {
                    match (previous_kind, kind) {
                        (RankingRuleKind::Relevancy, RankingRuleKind::Relevancy) => {
                            let merged_source = match (previous_source, source) {
                                (
                                    RankingRuleSource::Criterion(previous),
                                    RankingRuleSource::Criterion(current),
                                ) => RankingRuleSource::CoalescedCriteria(previous, current),
                                (
                                    RankingRuleSource::CoalescedCriteria(begin, _end),
                                    RankingRuleSource::Criterion(current),
                                ) => RankingRuleSource::CoalescedCriteria(begin, current),
                                (_previous, current) => current,
                            };
                            Ok(RankingRule { source: merged_source, kind })
                        }
                        _ => Err((previous, current)),
                    }
                },
            )
    }
    fn canonicalization_notes(&self) -> String {
        use CanonicalizationAction::*;
        let mut notes = String::new();
        for (index, action) in self.canonicalization_actions.iter().enumerate() {
            let index = index + 1;
            let _ = match action {
                PrependedWords { prepended_index } => writeln!(
                    &mut notes,
                    "    {index}. Prepended rule `words` before first relevancy rule `{}` at position {}",
                    prepended_index.rule_name(&self.source_criteria, &self.source_sort),
                    prepended_index.rule_position()
                ),
                RemovedDuplicate { earlier_occurrence, removed_occurrence } => writeln!(
                    &mut notes,
                    "    {index}. Removed duplicate rule `{}` at position {} as it already appears at position {}",
                    earlier_occurrence.rule_name(&self.source_criteria, &self.source_sort),
                    removed_occurrence.rule_position(),
                    earlier_occurrence.rule_position(),
                ),
                RemovedWords { reason, removed_occurrence } => writeln!(
                    &mut notes,
                    "    {index}. Removed rule `words` at position {} because {reason}",
                    removed_occurrence.rule_position()
                ),
                RemovedPlaceholder { removed_occurrence } => writeln!(
                    &mut notes,
                    "    {index}. Removed relevancy rule `{}` at position {} because the query is a placeholder search (`q`: \"\")",
                    removed_occurrence.rule_name(&self.source_criteria, &self.source_sort),
                    removed_occurrence.rule_position()
                ),
                TruncatedVector { vector_rule, truncated_from } => writeln!(
                    &mut notes,
                    "    {index}. Truncated relevancy rule `{}` at position {} and later rules because the query is a vector search and `vector` was inserted at position {}",
                    truncated_from.rule_name(&self.source_criteria, &self.source_sort),
                    truncated_from.rule_position(),
                    vector_rule.rule_position(),
                ),
                RemovedVector { vector_rule, removed_occurrence } => writeln!(
                    &mut notes,
                    "    {index}. Removed relevancy rule `{}` at position {} because the query is a vector search and `vector` was already inserted at position {}",
                    removed_occurrence.rule_name(&self.source_criteria, &self.source_sort),
                    removed_occurrence.rule_position(),
                    vector_rule.rule_position(),
                ),
                RemovedSort { removed_occurrence } => writeln!(
                    &mut notes,
                    "   {index}. Removed rule `sort` at position {} because `query.sort` is empty",
 removed_occurrence.rule_position()
                ),
            };
        }
        notes
    }
 }
 fn canonicalize_sort(
    sorted_fields: &mut HashMap<String, RankingRuleSource>,
    sort_query: &[AscDesc],
    criterion_index: usize,
    canonicalization_actions: &mut Vec<CanonicalizationAction>,
 ) -> Vec<AscDesc> {
    let mut geo_sorted = None;
    let mut canonical_sort = Vec::new();
    for (sort_index, asc_desc) in sort_query.iter().enumerate() {
        let source = RankingRuleSource::Sort { criterion_index, sort_index };
        let asc_desc = asc_desc.clone();
        match asc_desc.clone() {
            AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => {
                match sorted_fields.entry(s) {
                    std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
                        .push(CanonicalizationAction::RemovedDuplicate {
                            earlier_occurrence: *entry.get(),
                            removed_occurrence: source,
                        }),
                    std::collections::hash_map::Entry::Vacant(entry) => {
                        entry.insert(source);
                        canonical_sort.push(asc_desc);
                    }
                }
            }
            AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => match geo_sorted {
                Some(earlier_sort_index) => {
                    canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
                        earlier_occurrence: RankingRuleSource::Sort {
                            criterion_index,
                            sort_index: earlier_sort_index,
                        },
                        removed_occurrence: source,
                    })
                }
                None => {
                    geo_sorted = Some(sort_index);
                    canonical_sort.push(asc_desc);
                }
            },
        }
    }
    canonical_sort
 }
 fn canonicalize_criterion(
    criterion: Criterion,
    criterion_index: usize,
    terms_matching_strategy: TermsMatchingStrategy,
    words: &mut Option<Option<usize>>,
    canonicalization_actions: &mut Vec<CanonicalizationAction>,
    canonical_criteria: &mut Vec<Criterion>,
    rule: &mut Option<usize>,
 ) {
    *words = match (terms_matching_strategy, words.take()) {
        (TermsMatchingStrategy::All, words) => words,
        (_, None) => {
            // inject words
            canonicalization_actions.push(CanonicalizationAction::PrependedWords {
                prepended_index: RankingRuleSource::Criterion(criterion_index),
            });
            canonical_criteria.push(Criterion::Words);
            Some(None)
        }
        (_, words) => words,
    };
    if let Some(previous_index) = *rule {
        canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
            earlier_occurrence: RankingRuleSource::Criterion(previous_index),
            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
        });
    } else {
        *rule = Some(criterion_index);
        canonical_criteria.push(criterion)
    }
 }
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 enum RankingRuleKind {
    Relevancy,
    AscendingSort,
    DescendingSort,
    AscendingGeoSort,
    DescendingGeoSort,
 }
 #[derive(Debug, Clone, Copy)]
 pub struct RankingRule {
    source: RankingRuleSource,
    kind: RankingRuleKind,
 }
 #[derive(Debug, Clone, Copy)]
 pub enum RankingRuleSource {
    Criterion(usize),
    CoalescedCriteria(usize, usize),
    Sort { criterion_index: usize, sort_index: usize },
 }
 impl RankingRuleSource {
    fn rule_name(&self, criteria: &[Criterion], sort: &Option<Vec<AscDesc>>) -> String {
        match self {
            RankingRuleSource::Criterion(criterion_index) => criteria
                .get(*criterion_index)
                .map(|c| c.to_string())
                .unwrap_or_else(|| "unknown".into()),
            RankingRuleSource::CoalescedCriteria(begin, end) => {
                let rules: Vec<_> = criteria
                    .get(*begin..=*end)
                    .iter()
                    .flat_map(|c| c.iter())
                    .map(|c| c.to_string())
                    .collect();
                rules.join(", ")
            }
            RankingRuleSource::Sort { criterion_index: _, sort_index } => {
                match sort.as_deref().and_then(|sort| sort.get(*sort_index)) {
                    Some(sort) => match sort {
                        AscDesc::Asc(Member::Field(field_name)) => format!("{field_name}:asc"),
                        AscDesc::Desc(Member::Field(field_name)) => {
                            format!("{field_name}:desc")
                        }
                        AscDesc::Asc(Member::Geo(_)) => "_geo(..):asc".to_string(),
                        AscDesc::Desc(Member::Geo(_)) => "_geo(..):desc".to_string(),
                    },
                    None => "unknown".into(),
                }
            }
        }
    }
    fn rule_position(&self) -> String {
        match self {
            RankingRuleSource::Criterion(criterion_index) => {
                format!("#{criterion_index} in ranking rules")
            }
            RankingRuleSource::CoalescedCriteria(begin, end) => {
                format!("#{begin} to #{end} in ranking rules")
            }
            RankingRuleSource::Sort { criterion_index, sort_index } => format!(
                "#{sort_index} in `query.sort` (as `sort` is #{criterion_index} in ranking rules)"
            ),
        }
    }
 }
 impl RankingRule {
    fn from_criterion<'a>(
        criterion_index: usize,
        criterion: &'a Criterion,
        sort: &'a Option<Vec<AscDesc>>,
    ) -> impl Iterator<Item = Self> + 'a {
        let kind = match criterion {
            Criterion::Words
            | Criterion::Typo
            | Criterion::Proximity
            | Criterion::Attribute
            | Criterion::Exactness => RankingRuleKind::Relevancy,
            Criterion::Asc(s) if s == "_geo" => RankingRuleKind::AscendingGeoSort,
            Criterion::Asc(_) => RankingRuleKind::AscendingSort,
            Criterion::Desc(s) if s == "_geo" => RankingRuleKind::DescendingGeoSort,
            Criterion::Desc(_) => RankingRuleKind::DescendingSort,
            Criterion::Sort => {
                return either::Right(sort.iter().flatten().enumerate().map(
                    move |(rule_index, asc_desc)| {
                        Self::from_asc_desc(asc_desc, criterion_index, rule_index)
                    },
                ))
            }
        };
        either::Left(std::iter::once(Self {
            source: RankingRuleSource::Criterion(criterion_index),
            kind,
        }))
    }
    fn from_asc_desc(asc_desc: &AscDesc, sort_index: usize, rule_index_in_sort: usize) -> Self {
        let kind = match asc_desc {
            AscDesc::Asc(Member::Field(_)) => RankingRuleKind::AscendingSort,
            AscDesc::Desc(Member::Field(_)) => RankingRuleKind::DescendingSort,
            AscDesc::Asc(Member::Geo(_)) => RankingRuleKind::AscendingGeoSort,
            AscDesc::Desc(Member::Geo(_)) => RankingRuleKind::DescendingGeoSort,
        };
        Self {
            source: RankingRuleSource::Sort {
                criterion_index: sort_index,
                sort_index: rule_index_in_sort,
            },
            kind,
        }
    }
    fn as_string(
        &self,
        canonical_criteria: &[Criterion],
        canonical_sort: &Option<Vec<AscDesc>>,
        query_index: usize,
        index_uid: &str,
    ) -> String {
        let kind = match self.kind {
            RankingRuleKind::Relevancy => "relevancy",
            RankingRuleKind::AscendingSort => "ascending sort",
            RankingRuleKind::DescendingSort => "descending sort",
            RankingRuleKind::AscendingGeoSort => "ascending geo sort",
            RankingRuleKind::DescendingGeoSort => "descending geo sort",
        };
        let rules = self.fetch_from_source(canonical_criteria, canonical_sort);
        let source = match self.source {
            RankingRuleSource::Criterion(criterion_index) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"),
            RankingRuleSource::CoalescedCriteria(begin, end) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{begin}..={end}]`"),
            RankingRuleSource::Sort { criterion_index, sort_index } => format!("`queries[{query_index}].sort[{sort_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"),
        };
        format!("{source}: {kind} {rules}")
    }
    fn fetch_from_source(
        &self,
        canonical_criteria: &[Criterion],
        canonical_sort: &Option<Vec<AscDesc>>,
    ) -> String {
        let rule_name = match self.source {
            RankingRuleSource::Criterion(index) => {
                canonical_criteria.get(index).map(|criterion| criterion.to_string())
            }
            RankingRuleSource::CoalescedCriteria(begin, end) => {
                let rules: Vec<String> = canonical_criteria
                    .get(begin..=end)
                    .into_iter()
                    .flat_map(|criteria| criteria.iter())
                    .map(|criterion| criterion.to_string())
                    .collect();
                (!rules.is_empty()).then_some(rules.join(", "))
            }
            RankingRuleSource::Sort { criterion_index: _, sort_index } => canonical_sort
                .as_deref()
                .and_then(|canonical_sort| canonical_sort.get(sort_index))
                .and_then(|asc_desc: &AscDesc| match asc_desc {
                    AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => {
                        Some(format!("on field `{s}`"))
                    }
                    _ => None,
                }),
        };
        let rule_name = rule_name.unwrap_or_else(|| "default".into());
        format!("rule(s) {rule_name}")
    }
 }
--- a/meilisearch/tests/auth/tenant_token_multi_search.rs
+++ b/meilisearch/tests/auth/tenant_token_multi_search.rs
@ -310,6 +310,23 @@ macro_rules! compute_authorized_single_search {
                    tenant_token,
                    key_content
                );
                // federated
                let (response, code) = server.multi_search(json!({"federation": {}, "queries" : [{"indexUid": "sales", "filter": $filter}]})).await;
                assert_eq!(
                    200, code,
                    "{} using tenant_token: {:?} generated with parent_key: {:?}",
                    response, tenant_token, key_content
                );
                assert_eq!(
                    // same count as the search is federated over a single query
                    $expected_count,
                    response["hits"].as_array().unwrap().len(),
                    "{} using tenant_token: {:?} generated with parent_key: {:?}",
                    response,
                    tenant_token,
                    key_content
                );
            }
        }
    };
@ -375,6 +392,25 @@ macro_rules! compute_authorized_multiple_search {
                    tenant_token,
                    key_content
                );
                let (response, code) = server.multi_search(json!({"federation": {}, "queries" : [
                    {"indexUid": "sales", "filter": $filter1},
                    {"indexUid": "products", "filter": $filter2},
                ]})).await;
                assert_eq!(
                    code, 200,
                    "{} using tenant_token: {:?} generated with parent_key: {:?}",
                    response, tenant_token, key_content
                );
                assert_eq!(
                    response["hits"].as_array().unwrap().len(),
                    // sum of counts as the search is federated across to queries in different indexes
                    $expected_count1 + $expected_count2,
                    "{} using tenant_token: {:?} generated with parent_key: {:?}",
                    response,
                    tenant_token,
                    key_content
                );
            }
        }
    };
@ -433,6 +469,24 @@ macro_rules! compute_forbidden_single_search {
                    "{} using tenant_token: {:?} generated with parent_key: {:?}",
                    response, tenant_token, key_content
                );
                let (mut response, code) = server.multi_search(json!({"federation": {}, "queries" : [{"indexUid": "sales"}]})).await;
                if failed_query_index.is_none() && !response["message"].is_null() {
                    response["message"] = serde_json::json!(null);
                }
                assert_eq!(
                    response,
                    invalid_response(failed_query_index),
                    "{} using tenant_token: {:?} generated with parent_key: {:?}",
                    response,
                    tenant_token,
                    key_content
                );
                assert_eq!(
                    code, 403,
                    "{} using tenant_token: {:?} generated with parent_key: {:?}",
                    response, tenant_token, key_content
                );
            }
        }
    };
@ -494,6 +548,27 @@ macro_rules! compute_forbidden_multiple_search {
                    "{} using tenant_token: {:?} generated with parent_key: {:?}",
                    response, tenant_token, key_content
                );
                let (mut response, code) = server.multi_search(json!({"federation": {}, "queries" : [
                    {"indexUid": "sales"},
                    {"indexUid": "products"},
                ]})).await;
                if failed_query_index.is_none() && !response["message"].is_null() {
                    response["message"] = serde_json::json!(null);
                }
                assert_eq!(
                    response,
                    invalid_response(failed_query_index),
                    "{} using tenant_token: {:?} generated with parent_key: {:?}",
                    response,
                    tenant_token,
                    key_content
                );
                assert_eq!(
                    code, 403,
                    "{} using tenant_token: {:?} generated with parent_key: {:?}",
                    response, tenant_token, key_content
                );
            }
        }
    };
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@ -132,6 +132,79 @@ static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
    ])
 });
 static FRUITS_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
    json!([
        {
            "name": "Exclusive sale: green apple",
            "id": "green-apple-boosted",
            "BOOST": true
        },
        {
            "name": "Pear",
            "id": "pear",
        },
        {
            "name": "Red apple gala",
            "id": "red-apple-gala",
        },
        {
            "name": "Exclusive sale: Red Tomato",
            "id": "red-tomatoes-boosted",
            "BOOST": true
        },
        {
            "name": "Exclusive sale: Red delicious apple",
            "id": "red-delicious-boosted",
            "BOOST": true,
        }
    ])
 });
 static VECTOR_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
    json!([
      {
        "id": "A",
        "description": "the dog barks at the cat",
        "_vectors": {
          // dimensions [canine, feline, young]
          "animal": [0.9, 0.8, 0.05],
          // dimensions [negative/positive, energy]
          "sentiment": [-0.1, 0.55]
        }
      },
      {
        "id": "B",
        "description": "the kitten scratched the beagle",
        "_vectors": {
          // dimensions [canine, feline, young]
          "animal": [0.8, 0.9, 0.5],
          // dimensions [negative/positive, energy]
          "sentiment": [-0.2, 0.65]
        }
      },
      {
        "id": "C",
        "description": "the dog had to stay alone today",
        "_vectors": {
          // dimensions [canine, feline, young]
          "animal": [0.85, 0.02, 0.1],
          // dimensions [negative/positive, energy]
          "sentiment": [-1.0, 0.1]
        }
      },
      {
        "id": "D",
        "description": "the little boy pets the puppy",
        "_vectors": {
          // dimensions [canine, feline, young]
          "animal": [0.8, 0.09, 0.8],
          // dimensions [negative/positive, energy]
          "sentiment": [0.8, 0.3]
        }
      },
    ])
 });
 #[actix_rt::test]
 async fn simple_placeholder_search() {
    let server = Server::new().await;
--- a/meilisearch/tests/search/multi.rs
+++ b/meilisearch/tests/search/multi.rs
--- a/milli/src/score_details.rs
+++ b/milli/src/score_details.rs
@ -425,9 +425,6 @@ pub struct Sort {
 impl PartialOrd for Sort {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        if self.field_name != other.field_name {
            return None;
        }
        if self.ascending != other.ascending {
            return None;
        }
@ -466,9 +463,6 @@ pub struct GeoSort {
 impl PartialOrd for GeoSort {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        if self.target_point != other.target_point {
            return None;
        }
        if self.ascending != other.ascending {
            return None;
        }
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@ -46,7 +46,7 @@ impl<'m> MatcherBuilder<'m> {
        self
    }
-    pub fn build<'t>(&'m self, text: &'t str) -> Matcher<'t, 'm> {
+    pub fn build<'t>(&self, text: &'t str) -> Matcher<'t, 'm, '_> {
        let crop_marker = match &self.crop_marker {
            Some(marker) => marker.as_str(),
            None => DEFAULT_CROP_MARKER,
@ -105,19 +105,19 @@ pub struct MatchBounds {
    pub length: usize,
 }
-/// Structure used to analize a string, compute words that match,
+/// Structure used to analyze a string, compute words that match,
 /// and format the source string, returning a highlighted and cropped sub-string.
-pub struct Matcher<'t, 'm> {
+pub struct Matcher<'t, 'tokenizer, 'b> {
    text: &'t str,
-    matching_words: &'m MatchingWords,
+    matching_words: &'b MatchingWords,
-    tokenizer: &'m Tokenizer<'m>,
+    tokenizer: &'b Tokenizer<'tokenizer>,
-    crop_marker: &'m str,
+    crop_marker: &'b str,
-    highlight_prefix: &'m str,
+    highlight_prefix: &'b str,
-    highlight_suffix: &'m str,
+    highlight_suffix: &'b str,
    matches: Option<(Vec<Token<'t>>, Vec<Match>)>,
 }
-impl<'t> Matcher<'t, '_> {
+impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_> {
    /// Iterates over tokens and save any of them that matches the query.
    fn compute_matches(&mut self) -> &mut Self {
        /// some words are counted as matches only if they are close together and in the good order,