From 5c323cecc77ed23410543da0b9d62215bd7b5a23 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 11 Jul 2024 16:36:25 +0200 Subject: [PATCH] search: introduce federated search --- meilisearch/src/search.rs | 41 +- meilisearch/src/search/federated.rs | 633 ++++++++++++++ .../src/search/federated/ranking_rules.rs | 823 ++++++++++++++++++ 3 files changed, 1489 insertions(+), 8 deletions(-) create mode 100644 meilisearch/src/search/federated.rs create mode 100644 meilisearch/src/search/federated/ranking_rules.rs diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 2a684817a..ea4bbd038 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -31,6 +31,9 @@ use serde_json::{json, Value}; use crate::error::MeilisearchHttpError; +mod federated; +pub use federated::{perform_federated_search, FederatedSearch, Federation, FederationOptions}; + type MatchesPosition = BTreeMap>; pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0; @@ -360,7 +363,7 @@ impl SearchQuery { } } -/// A `SearchQuery` + an index UID. +/// A `SearchQuery` + an index UID and optional FederationOptions. // This struct contains the fields of `SearchQuery` inline. // This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields. // The `From` implementation ensures both structs remain up to date. @@ -375,10 +378,10 @@ pub struct SearchQueryWithIndex { pub vector: Option>, #[deserr(default, error = DeserrJsonError)] pub hybrid: Option, - #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] - pub offset: usize, - #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError)] - pub limit: usize, + #[deserr(default, error = DeserrJsonError)] + pub offset: Option, + #[deserr(default, error = DeserrJsonError)] + pub limit: Option, #[deserr(default, error = DeserrJsonError)] pub page: Option, #[deserr(default, error = DeserrJsonError)] @@ -419,12 +422,33 @@ pub struct SearchQueryWithIndex { pub attributes_to_search_on: Option>, #[deserr(default, error = DeserrJsonError, default)] pub ranking_score_threshold: Option, + + #[deserr(default)] + pub federation_options: Option, } impl SearchQueryWithIndex { - pub fn into_index_query(self) -> (IndexUid, SearchQuery) { + pub fn has_federation_options(&self) -> bool { + self.federation_options.is_some() + } + pub fn has_pagination(&self) -> Option<&'static str> { + if self.offset.is_some() { + Some("offset") + } else if self.limit.is_some() { + Some("limit") + } else if self.page.is_some() { + Some("page") + } else if self.hits_per_page.is_some() { + Some("hitsPerPage") + } else { + None + } + } + + pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option) { let SearchQueryWithIndex { index_uid, + federation_options, q, vector, offset, @@ -456,8 +480,8 @@ impl SearchQueryWithIndex { SearchQuery { q, vector, - offset, - limit, + offset: offset.unwrap_or(DEFAULT_SEARCH_OFFSET()), + limit: limit.unwrap_or(DEFAULT_SEARCH_LIMIT()), page, hits_per_page, attributes_to_retrieve, @@ -482,6 +506,7 @@ impl SearchQueryWithIndex { // do not use ..Default::default() here, // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex` }, + federation_options, ) } } diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs new file mode 100644 index 000000000..ebb1da7b8 --- /dev/null +++ b/meilisearch/src/search/federated.rs @@ -0,0 +1,633 @@ +use std::cmp::Ordering; +use std::collections::BTreeMap; +use std::fmt; +use std::iter::Zip; +use std::rc::Rc; +use std::str::FromStr as _; +use std::time::Duration; +use std::vec::{IntoIter, Vec}; + +use actix_http::StatusCode; +use index_scheduler::{IndexScheduler, RoFeatures}; +use meilisearch_types::deserr::DeserrJsonError; +use meilisearch_types::error::deserr_codes::{ + InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset, +}; +use meilisearch_types::error::ResponseError; +use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue}; +use meilisearch_types::milli::{self, DocumentId, TimeBudget}; +use roaring::RoaringBitmap; +use serde::Serialize; + +use self::ranking_rules::RankingRules; +use super::{ + prepare_search, AttributesFormat, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, + SearchQuery, SearchQueryWithIndex, +}; +use crate::error::MeilisearchHttpError; +use crate::routes::indexes::search::search_kind; + +mod ranking_rules; + +pub const DEFAULT_FEDERATED_WEIGHT: fn() -> f64 = || 1.0; + +#[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct FederationOptions { + #[deserr(default, error = DeserrJsonError)] + pub weight: Weight, +} + +#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)] +#[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)] +pub struct Weight(f64); + +impl Default for Weight { + fn default() -> Self { + Weight(DEFAULT_FEDERATED_WEIGHT()) + } +} + +impl std::convert::TryFrom for Weight { + type Error = InvalidMultiSearchWeight; + + fn try_from(f: f64) -> Result { + // the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable + #[allow(clippy::manual_range_contains)] + if f < 0.0 { + Err(InvalidMultiSearchWeight) + } else { + Ok(Weight(f)) + } + } +} + +impl std::ops::Deref for Weight { + type Target = f64; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +#[derive(Debug, deserr::Deserr)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct Federation { + #[deserr(default = super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError)] + pub limit: usize, + #[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] + pub offset: usize, +} + +#[derive(Debug, deserr::Deserr)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct FederatedSearch { + pub queries: Vec, + #[deserr(default)] + pub federation: Option, +} +#[derive(Serialize, Clone, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct FederatedSearchResult { + pub hits: Vec, + pub processing_time_ms: u128, + #[serde(flatten)] + pub hits_info: HitsInfo, + + #[serde(skip_serializing_if = "Option::is_none")] + pub semantic_hit_count: Option, + + // These fields are only used for analytics purposes + #[serde(skip)] + pub degraded: bool, + #[serde(skip)] + pub used_negative_operator: bool, +} + +impl fmt::Debug for FederatedSearchResult { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let FederatedSearchResult { + hits, + processing_time_ms, + hits_info, + semantic_hit_count, + degraded, + used_negative_operator, + } = self; + + let mut debug = f.debug_struct("SearchResult"); + // The most important thing when looking at a search result is the time it took to process + debug.field("processing_time_ms", &processing_time_ms); + debug.field("hits", &format!("[{} hits returned]", hits.len())); + debug.field("hits_info", &hits_info); + if *used_negative_operator { + debug.field("used_negative_operator", used_negative_operator); + } + if *degraded { + debug.field("degraded", degraded); + } + if let Some(semantic_hit_count) = semantic_hit_count { + debug.field("semantic_hit_count", &semantic_hit_count); + } + + debug.finish() + } +} + +struct WeightedScore<'a> { + details: &'a [ScoreDetails], + weight: f64, +} + +impl<'a> WeightedScore<'a> { + pub fn new(details: &'a [ScoreDetails], weight: f64) -> Self { + Self { details, weight } + } + + pub fn weighted_global_score(&self) -> f64 { + ScoreDetails::global_score(self.details.iter()) * self.weight + } + + pub fn compare_weighted_global_scores(&self, other: &Self) -> Ordering { + self.weighted_global_score() + .partial_cmp(&other.weighted_global_score()) + // both are numbers, possibly infinite + .unwrap() + } + + pub fn compare(&self, other: &Self) -> Ordering { + let mut left_it = ScoreDetails::score_values(self.details.iter()); + let mut right_it = ScoreDetails::score_values(other.details.iter()); + + loop { + let left = left_it.next(); + let right = right_it.next(); + + match (left, right) { + (None, None) => return Ordering::Equal, + (None, Some(_)) => return Ordering::Less, + (Some(_), None) => return Ordering::Greater, + (Some(ScoreValue::Score(left)), Some(ScoreValue::Score(right))) => { + let left = left * self.weight; + let right = right * other.weight; + if (left - right).abs() <= f64::EPSILON { + continue; + } + return left.partial_cmp(&right).unwrap(); + } + (Some(ScoreValue::Sort(left)), Some(ScoreValue::Sort(right))) => { + match left.partial_cmp(right) { + Some(Ordering::Equal) => continue, + Some(order) => return order, + None => return self.compare_weighted_global_scores(other), + } + } + (Some(ScoreValue::GeoSort(left)), Some(ScoreValue::GeoSort(right))) => { + match left.partial_cmp(right) { + Some(Ordering::Equal) => continue, + Some(order) => return order, + None => { + return self.compare_weighted_global_scores(other); + } + } + } + // not comparable details, use global + (Some(ScoreValue::Score(_)), Some(_)) + | (Some(_), Some(ScoreValue::Score(_))) + | (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_))) + | (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => { + let left_count = left_it.count(); + let right_count = right_it.count(); + // compare how many remaining groups of rules each side has. + // the group with the most remaining groups wins. + return left_count + .cmp(&right_count) + // breaks ties with the global ranking score + .then_with(|| self.compare_weighted_global_scores(other)); + } + } + } + } +} + +struct QueryByIndex { + query: SearchQuery, + federation_options: FederationOptions, + query_index: usize, +} + +struct SearchResultByQuery<'a> { + documents_ids: Vec, + document_scores: Vec>, + federation_options: FederationOptions, + hit_maker: HitMaker<'a>, + query_index: usize, +} + +struct SearchResultByQueryIter<'a> { + it: Zip, IntoIter>>, + federation_options: FederationOptions, + hit_maker: Rc>, + query_index: usize, +} + +impl<'a> SearchResultByQueryIter<'a> { + fn new( + SearchResultByQuery { + documents_ids, + document_scores, + federation_options, + hit_maker, + query_index, + }: SearchResultByQuery<'a>, + ) -> Self { + let it = documents_ids.into_iter().zip(document_scores); + Self { it, federation_options, hit_maker: Rc::new(hit_maker), query_index } + } +} + +struct SearchResultByQueryIterItem<'a> { + docid: DocumentId, + score: Vec, + federation_options: FederationOptions, + hit_maker: Rc>, + query_index: usize, +} + +fn merge_index_local_results( + results_by_query: Vec>, +) -> impl Iterator + '_ { + itertools::kmerge_by( + results_by_query.into_iter().map(SearchResultByQueryIter::new), + |left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| { + let left_score = WeightedScore::new(&left.score, *left.federation_options.weight); + let right_score = WeightedScore::new(&right.score, *right.federation_options.weight); + + match left_score.compare(&right_score) { + // the biggest score goes first + Ordering::Greater => true, + // break ties using query index + Ordering::Equal => left.query_index < right.query_index, + Ordering::Less => false, + } + }, + ) +} + +fn merge_index_global_results( + results_by_index: Vec, +) -> impl Iterator { + itertools::kmerge_by( + results_by_index.into_iter().map(|result_by_index| result_by_index.hits.into_iter()), + |left: &SearchHitByIndex, right: &SearchHitByIndex| { + let left_score = WeightedScore::new(&left.score, *left.federation_options.weight); + let right_score = WeightedScore::new(&right.score, *right.federation_options.weight); + + match left_score.compare(&right_score) { + // the biggest score goes first + Ordering::Greater => true, + // break ties using query index + Ordering::Equal => left.query_index < right.query_index, + Ordering::Less => false, + } + }, + ) +} + +impl<'a> Iterator for SearchResultByQueryIter<'a> { + type Item = SearchResultByQueryIterItem<'a>; + + fn next(&mut self) -> Option { + let (docid, score) = self.it.next()?; + Some(SearchResultByQueryIterItem { + docid, + score, + federation_options: self.federation_options, + hit_maker: Rc::clone(&self.hit_maker), + query_index: self.query_index, + }) + } +} + +struct SearchHitByIndex { + hit: SearchHit, + score: Vec, + federation_options: FederationOptions, + query_index: usize, +} + +struct SearchResultByIndex { + hits: Vec, + candidates: RoaringBitmap, + degraded: bool, + used_negative_operator: bool, +} + +pub fn perform_federated_search( + index_scheduler: &IndexScheduler, + queries: Vec, + federation: Federation, + features: RoFeatures, +) -> Result { + let before_search = std::time::Instant::now(); + + // this implementation partition the queries by index to guarantee an important property: + // - all the queries to a particular index use the same read transaction. + // This is an important property, otherwise we cannot guarantee the self-consistency of the results. + + // 1. partition queries by index + let mut queries_by_index: BTreeMap> = Default::default(); + for (query_index, federated_query) in queries.into_iter().enumerate() { + if let Some(pagination_field) = federated_query.has_pagination() { + return Err(MeilisearchHttpError::PaginationInFederatedQuery( + query_index, + pagination_field, + ) + .into()); + } + + let (index_uid, query, federation_options) = federated_query.into_index_query_federation(); + + queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex { + query, + federation_options: federation_options.unwrap_or_default(), + query_index, + }) + } + + // 2. perform queries, merge and make hits index by index + let required_hit_count = federation.limit + federation.offset; + // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic + // Then in step (3), we'll update its value if there is any semantic search + let mut semantic_hit_count = None; + let mut results_by_index = Vec::with_capacity(queries_by_index.len()); + let mut previous_query_data: Option<(RankingRules, usize, String)> = None; + + for (index_uid, queries) in queries_by_index { + let index = match index_scheduler.index(&index_uid) { + Ok(index) => index, + Err(err) => { + let mut err = ResponseError::from(err); + // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but + // here the resource not found is not part of the URL. + err.code = StatusCode::BAD_REQUEST; + if let Some(query) = queries.first() { + err.message = + format!("Inside `.queries[{}]`: {}", query.query_index, err.message); + } + return Err(err); + } + }; + + // Important: this is the only transaction we'll use for this index during this federated search + let rtxn = index.read_txn()?; + + let criteria = index.criteria(&rtxn)?; + + // stuff we need for the hitmaker + let script_lang_map = index.script_language(&rtxn)?; + + let dictionary = index.dictionary(&rtxn)?; + let dictionary: Option> = + dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); + let separators = index.allowed_separators(&rtxn)?; + let separators: Option> = + separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); + + // each query gets its individual cutoff + let cutoff = index.search_cutoff(&rtxn)?; + + let mut degraded = false; + let mut used_negative_operator = false; + let mut candidates = RoaringBitmap::new(); + + // 2.1. Compute all candidates for each query in the index + let mut results_by_query = Vec::with_capacity(queries.len()); + + for QueryByIndex { query, federation_options, query_index } in queries { + // use an immediately invoked lambda to capture the result without returning from the function + + let res: Result<(), ResponseError> = (|| { + let search_kind = search_kind(&query, index_scheduler, &index, features)?; + + let canonicalization_kind = match (&search_kind, &query.q) { + (SearchKind::SemanticOnly { .. }, _) => { + ranking_rules::CanonicalizationKind::Vector + } + (_, Some(q)) if !q.is_empty() => ranking_rules::CanonicalizationKind::Keyword, + _ => ranking_rules::CanonicalizationKind::Placeholder, + }; + + let sort = if let Some(sort) = &query.sort { + let sorts: Vec<_> = + match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() { + Ok(sorts) => sorts, + Err(asc_desc_error) => { + return Err(milli::Error::from(milli::SortError::from( + asc_desc_error, + )) + .into()) + } + }; + Some(sorts) + } else { + None + }; + + let ranking_rules = ranking_rules::RankingRules::new( + criteria.clone(), + sort, + query.matching_strategy.into(), + canonicalization_kind, + ); + + if let Some((previous_ranking_rules, previous_query_index, previous_index_uid)) = + previous_query_data.take() + { + if let Err(error) = ranking_rules.is_compatible_with(&previous_ranking_rules) { + return Err(error.to_response_error( + &ranking_rules, + &previous_ranking_rules, + query_index, + previous_query_index, + &index_uid, + &previous_index_uid, + )); + } + previous_query_data = if previous_ranking_rules.constraint_count() + > ranking_rules.constraint_count() + { + Some((previous_ranking_rules, previous_query_index, previous_index_uid)) + } else { + Some((ranking_rules, query_index, index_uid.clone())) + }; + } else { + previous_query_data = Some((ranking_rules, query_index, index_uid.clone())); + } + + match search_kind { + SearchKind::KeywordOnly => {} + _ => semantic_hit_count = Some(0), + } + + let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?; + + let time_budget = match cutoff { + Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)), + None => TimeBudget::default(), + }; + + let (mut search, _is_finite_pagination, _max_total_hits, _offset) = + prepare_search(&index, &rtxn, &query, &search_kind, time_budget)?; + + search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed); + search.offset(0); + search.limit(required_hit_count); + + let (result, _semantic_hit_count) = super::search_from_kind(search_kind, search)?; + let format = AttributesFormat { + attributes_to_retrieve: query.attributes_to_retrieve, + retrieve_vectors, + attributes_to_highlight: query.attributes_to_highlight, + attributes_to_crop: query.attributes_to_crop, + crop_length: query.crop_length, + crop_marker: query.crop_marker, + highlight_pre_tag: query.highlight_pre_tag, + highlight_post_tag: query.highlight_post_tag, + show_matches_position: query.show_matches_position, + sort: query.sort, + show_ranking_score: query.show_ranking_score, + show_ranking_score_details: query.show_ranking_score_details, + }; + + let milli::SearchResult { + matching_words, + candidates: query_candidates, + documents_ids, + document_scores, + degraded: query_degraded, + used_negative_operator: query_used_negative_operator, + } = result; + + candidates |= query_candidates; + degraded |= query_degraded; + used_negative_operator |= query_used_negative_operator; + + let tokenizer = HitMaker::tokenizer( + &script_lang_map, + dictionary.as_deref(), + separators.as_deref(), + ); + + let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer); + + let hit_maker = HitMaker::new(&index, &rtxn, format, formatter_builder)?; + + results_by_query.push(SearchResultByQuery { + federation_options, + hit_maker, + query_index, + documents_ids, + document_scores, + }); + Ok(()) + })(); + + if let Err(mut error) = res { + error.message = format!("Inside `.queries[{query_index}]`: {}", error.message); + return Err(error); + } + } + // 2.2. merge inside index + let mut documents_seen = RoaringBitmap::new(); + let merged_result: Result, ResponseError> = + merge_index_local_results(results_by_query) + // skip documents we've already seen & mark that we saw the current document + .filter(|SearchResultByQueryIterItem { docid, .. }| documents_seen.insert(*docid)) + .take(required_hit_count) + // 2.3 make hits + .map( + |SearchResultByQueryIterItem { + docid, + score, + federation_options, + hit_maker, + query_index, + }| { + let mut hit = hit_maker.make_hit(docid, &score)?; + let weighted_score = + ScoreDetails::global_score(score.iter()) * (*federation_options.weight); + + let _federation = serde_json::json!( + { + "indexUid": index_uid, + "queriesPosition": query_index, + "weightedRankingScore": weighted_score, + } + ); + hit.document.insert("_federation".to_string(), _federation); + Ok(SearchHitByIndex { hit, score, federation_options, query_index }) + }, + ) + .collect(); + + let merged_result = merged_result?; + results_by_index.push(SearchResultByIndex { + hits: merged_result, + candidates, + degraded, + used_negative_operator, + }); + } + + // 3. merge hits and metadata across indexes + // 3.1 merge metadata + let (estimated_total_hits, degraded, used_negative_operator) = { + let mut estimated_total_hits = 0; + let mut degraded = false; + let mut used_negative_operator = false; + + for SearchResultByIndex { + hits: _, + candidates, + degraded: degraded_by_index, + used_negative_operator: used_negative_operator_by_index, + } in &results_by_index + { + estimated_total_hits += candidates.len() as usize; + degraded |= *degraded_by_index; + used_negative_operator |= *used_negative_operator_by_index; + } + + (estimated_total_hits, degraded, used_negative_operator) + }; + + // 3.2 merge hits + let merged_hits: Vec<_> = merge_index_global_results(results_by_index) + .skip(federation.offset) + .take(federation.limit) + .inspect(|hit| { + if let Some(semantic_hit_count) = &mut semantic_hit_count { + if hit.score.iter().any(|score| matches!(&score, ScoreDetails::Vector(_))) { + *semantic_hit_count += 1; + } + } + }) + .map(|hit| hit.hit) + .collect(); + + let search_result = FederatedSearchResult { + hits: merged_hits, + processing_time_ms: before_search.elapsed().as_millis(), + hits_info: HitsInfo::OffsetLimit { + limit: federation.limit, + offset: federation.offset, + estimated_total_hits, + }, + semantic_hit_count, + degraded, + used_negative_operator, + }; + + Ok(search_result) +} diff --git a/meilisearch/src/search/federated/ranking_rules.rs b/meilisearch/src/search/federated/ranking_rules.rs new file mode 100644 index 000000000..d31c0ed35 --- /dev/null +++ b/meilisearch/src/search/federated/ranking_rules.rs @@ -0,0 +1,823 @@ +use std::collections::HashMap; +use std::fmt::Write; + +use itertools::Itertools as _; +use meilisearch_types::error::{Code, ResponseError}; +use meilisearch_types::milli::{AscDesc, Criterion, Member, TermsMatchingStrategy}; + +pub struct RankingRules { + canonical_criteria: Vec, + canonical_sort: Option>, + canonicalization_actions: Vec, + source_criteria: Vec, + source_sort: Option>, +} + +pub enum CanonicalizationAction { + PrependedWords { + prepended_index: RankingRuleSource, + }, + RemovedDuplicate { + earlier_occurrence: RankingRuleSource, + removed_occurrence: RankingRuleSource, + }, + RemovedWords { + reason: RemoveWords, + removed_occurrence: RankingRuleSource, + }, + RemovedPlaceholder { + removed_occurrence: RankingRuleSource, + }, + TruncatedVector { + vector_rule: RankingRuleSource, + truncated_from: RankingRuleSource, + }, + RemovedVector { + vector_rule: RankingRuleSource, + removed_occurrence: RankingRuleSource, + }, + RemovedSort { + removed_occurrence: RankingRuleSource, + }, +} + +pub enum RemoveWords { + WasPrepended, + MatchingStrategyAll, +} + +impl std::fmt::Display for RemoveWords { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let reason = match self { + RemoveWords::WasPrepended => "it was previously prepended", + RemoveWords::MatchingStrategyAll => "`query.matchingWords` is set to `all`", + }; + f.write_str(reason) + } +} + +pub enum CanonicalizationKind { + Placeholder, + Keyword, + Vector, +} + +pub struct CompatibilityError { + previous: RankingRule, + current: RankingRule, +} +impl CompatibilityError { + pub(crate) fn to_response_error( + &self, + ranking_rules: &RankingRules, + previous_ranking_rules: &RankingRules, + query_index: usize, + previous_query_index: usize, + index_uid: &str, + previous_index_uid: &str, + ) -> meilisearch_types::error::ResponseError { + let rule = self.current.as_string( + &ranking_rules.canonical_criteria, + &ranking_rules.canonical_sort, + query_index, + index_uid, + ); + let previous_rule = self.previous.as_string( + &previous_ranking_rules.canonical_criteria, + &previous_ranking_rules.canonical_sort, + previous_query_index, + previous_index_uid, + ); + + let canonicalization_actions = ranking_rules.canonicalization_notes(); + let previous_canonicalization_actions = previous_ranking_rules.canonicalization_notes(); + + let mut msg = String::new(); + let reason = self.reason(); + let _ = writeln!( + &mut msg, + "The results of queries #{previous_query_index} and #{query_index} are incompatible: " + ); + let _ = writeln!(&mut msg, " 1. {previous_rule}"); + let _ = writeln!(&mut msg, " 2. {rule}"); + let _ = writeln!(&mut msg, " - {reason}"); + + if !previous_canonicalization_actions.is_empty() { + let _ = write!(&mut msg, " - note: The ranking rules of query #{previous_query_index} were modified during canonicalization:\n{previous_canonicalization_actions}"); + } + + if !canonicalization_actions.is_empty() { + let _ = write!(&mut msg, " - note: The ranking rules of query #{query_index} were modified during canonicalization:\n{canonicalization_actions}"); + } + + ResponseError::from_msg(msg, Code::InvalidMultiSearchQueryRankingRules) + } + pub fn reason(&self) -> &'static str { + match (self.previous.kind, self.current.kind) { + (RankingRuleKind::Relevancy, RankingRuleKind::AscendingSort) + | (RankingRuleKind::Relevancy, RankingRuleKind::DescendingSort) + | (RankingRuleKind::AscendingSort, RankingRuleKind::Relevancy) + | (RankingRuleKind::DescendingSort, RankingRuleKind::Relevancy) => { + "cannot compare a relevancy rule with a sort rule" + } + + (RankingRuleKind::Relevancy, RankingRuleKind::AscendingGeoSort) + | (RankingRuleKind::Relevancy, RankingRuleKind::DescendingGeoSort) + | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::Relevancy) + | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::Relevancy) => { + "cannot compare a relevancy rule with a sort rule" + } + + (RankingRuleKind::AscendingSort, RankingRuleKind::DescendingSort) + | (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingSort) => { + "cannot compare two sort rules in opposite directions" + } + + (RankingRuleKind::AscendingSort, RankingRuleKind::AscendingGeoSort) + | (RankingRuleKind::AscendingSort, RankingRuleKind::DescendingGeoSort) + | (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingGeoSort) + | (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingGeoSort) + | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingSort) + | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingSort) + | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingSort) + | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingSort) => { + "cannot compare a sort rule with a geosort rule" + } + + (RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingGeoSort) + | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingGeoSort) => { + "cannot compare two geosort rules in opposite directions" + } + (RankingRuleKind::Relevancy, RankingRuleKind::Relevancy) + | (RankingRuleKind::AscendingSort, RankingRuleKind::AscendingSort) + | (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingSort) + | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingGeoSort) + | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingGeoSort) => { + "internal error, comparison should be possible" + } + } + } +} + +impl RankingRules { + pub fn new( + criteria: Vec, + sort: Option>, + terms_matching_strategy: TermsMatchingStrategy, + canonicalization_kind: CanonicalizationKind, + ) -> Self { + let (canonical_criteria, canonical_sort, canonicalization_actions) = + Self::canonicalize(&criteria, &sort, terms_matching_strategy, canonicalization_kind); + Self { + canonical_criteria, + canonical_sort, + canonicalization_actions, + source_criteria: criteria, + source_sort: sort, + } + } + + fn canonicalize( + criteria: &[Criterion], + sort: &Option>, + terms_matching_strategy: TermsMatchingStrategy, + canonicalization_kind: CanonicalizationKind, + ) -> (Vec, Option>, Vec) { + match canonicalization_kind { + CanonicalizationKind::Placeholder => Self::canonicalize_placeholder(criteria, sort), + CanonicalizationKind::Keyword => { + Self::canonicalize_keyword(criteria, sort, terms_matching_strategy) + } + CanonicalizationKind::Vector => Self::canonicalize_vector(criteria, sort), + } + } + + fn canonicalize_placeholder( + criteria: &[Criterion], + sort_query: &Option>, + ) -> (Vec, Option>, Vec) { + let mut sort = None; + + let mut sorted_fields = HashMap::new(); + let mut canonicalization_actions = Vec::new(); + let mut canonical_criteria = Vec::new(); + let mut canonical_sort = None; + + for (criterion_index, criterion) in criteria.iter().enumerate() { + match criterion.clone() { + Criterion::Words + | Criterion::Typo + | Criterion::Proximity + | Criterion::Attribute + | Criterion::Exactness => { + canonicalization_actions.push(CanonicalizationAction::RemovedPlaceholder { + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }) + } + + Criterion::Sort => { + if let Some(previous_index) = sort { + canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: RankingRuleSource::Criterion(previous_index), + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }); + } else if let Some(sort_query) = sort_query { + sort = Some(criterion_index); + canonical_criteria.push(criterion.clone()); + canonical_sort = Some(canonicalize_sort( + &mut sorted_fields, + sort_query.as_slice(), + criterion_index, + &mut canonicalization_actions, + )); + } else { + canonicalization_actions.push(CanonicalizationAction::RemovedSort { + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }) + } + } + Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) { + std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions + .push(CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: *entry.get(), + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }), + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(RankingRuleSource::Criterion(criterion_index)); + canonical_criteria.push(criterion.clone()) + } + }, + } + } + + (canonical_criteria, canonical_sort, canonicalization_actions) + } + + fn canonicalize_vector( + criteria: &[Criterion], + sort_query: &Option>, + ) -> (Vec, Option>, Vec) { + let mut sort = None; + + let mut sorted_fields = HashMap::new(); + let mut canonicalization_actions = Vec::new(); + let mut canonical_criteria = Vec::new(); + let mut canonical_sort = None; + + let mut vector = None; + + 'criteria: for (criterion_index, criterion) in criteria.iter().enumerate() { + match criterion.clone() { + Criterion::Words + | Criterion::Typo + | Criterion::Proximity + | Criterion::Attribute + | Criterion::Exactness => match vector { + Some(previous_occurrence) => { + if sorted_fields.is_empty() { + canonicalization_actions.push(CanonicalizationAction::RemovedVector { + vector_rule: RankingRuleSource::Criterion(previous_occurrence), + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }); + } else { + canonicalization_actions.push( + CanonicalizationAction::TruncatedVector { + vector_rule: RankingRuleSource::Criterion(previous_occurrence), + truncated_from: RankingRuleSource::Criterion(criterion_index), + }, + ); + break 'criteria; + } + } + None => { + canonical_criteria.push(criterion.clone()); + vector = Some(criterion_index); + } + }, + + Criterion::Sort => { + if let Some(previous_index) = sort { + canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: RankingRuleSource::Criterion(previous_index), + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }); + } else if let Some(sort_query) = sort_query { + sort = Some(criterion_index); + canonical_criteria.push(criterion.clone()); + canonical_sort = Some(canonicalize_sort( + &mut sorted_fields, + sort_query.as_slice(), + criterion_index, + &mut canonicalization_actions, + )); + } else { + canonicalization_actions.push(CanonicalizationAction::RemovedSort { + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }) + } + } + Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) { + std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions + .push(CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: *entry.get(), + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }), + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(RankingRuleSource::Criterion(criterion_index)); + canonical_criteria.push(criterion.clone()) + } + }, + } + } + + (canonical_criteria, canonical_sort, canonicalization_actions) + } + + fn canonicalize_keyword( + criteria: &[Criterion], + sort_query: &Option>, + terms_matching_strategy: TermsMatchingStrategy, + ) -> (Vec, Option>, Vec) { + let mut words = None; + let mut typo = None; + let mut proximity = None; + let mut sort = None; + let mut attribute = None; + let mut exactness = None; + let mut sorted_fields = HashMap::new(); + + let mut canonical_criteria = Vec::new(); + let mut canonical_sort = None; + + let mut canonicalization_actions = Vec::new(); + + for (criterion_index, criterion) in criteria.iter().enumerate() { + let criterion = criterion.clone(); + match criterion.clone() { + Criterion::Words => { + if let TermsMatchingStrategy::All = terms_matching_strategy { + canonicalization_actions.push(CanonicalizationAction::RemovedWords { + reason: RemoveWords::MatchingStrategyAll, + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }); + continue; + } + if let Some(maybe_previous_index) = words { + if let Some(previous_index) = maybe_previous_index { + canonicalization_actions.push( + CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: RankingRuleSource::Criterion( + previous_index, + ), + removed_occurrence: RankingRuleSource::Criterion( + criterion_index, + ), + }, + ); + continue; + } + canonicalization_actions.push(CanonicalizationAction::RemovedWords { + reason: RemoveWords::WasPrepended, + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }) + } + words = Some(Some(criterion_index)); + canonical_criteria.push(criterion); + } + Criterion::Typo => { + canonicalize_criterion( + criterion, + criterion_index, + terms_matching_strategy, + &mut words, + &mut canonicalization_actions, + &mut canonical_criteria, + &mut typo, + ); + } + Criterion::Proximity => { + canonicalize_criterion( + criterion, + criterion_index, + terms_matching_strategy, + &mut words, + &mut canonicalization_actions, + &mut canonical_criteria, + &mut proximity, + ); + } + Criterion::Attribute => { + canonicalize_criterion( + criterion, + criterion_index, + terms_matching_strategy, + &mut words, + &mut canonicalization_actions, + &mut canonical_criteria, + &mut attribute, + ); + } + Criterion::Exactness => { + canonicalize_criterion( + criterion, + criterion_index, + terms_matching_strategy, + &mut words, + &mut canonicalization_actions, + &mut canonical_criteria, + &mut exactness, + ); + } + + Criterion::Sort => { + if let Some(previous_index) = sort { + canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: RankingRuleSource::Criterion(previous_index), + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }); + } else if let Some(sort_query) = sort_query { + sort = Some(criterion_index); + canonical_criteria.push(criterion); + canonical_sort = Some(canonicalize_sort( + &mut sorted_fields, + sort_query.as_slice(), + criterion_index, + &mut canonicalization_actions, + )); + } else { + canonicalization_actions.push(CanonicalizationAction::RemovedSort { + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }) + } + } + Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) { + std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions + .push(CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: *entry.get(), + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }), + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(RankingRuleSource::Criterion(criterion_index)); + canonical_criteria.push(criterion) + } + }, + } + } + + (canonical_criteria, canonical_sort, canonicalization_actions) + } + + pub fn is_compatible_with(&self, previous: &Self) -> Result<(), CompatibilityError> { + for (current, previous) in self.coalesce_iterator().zip(previous.coalesce_iterator()) { + if current.kind != previous.kind { + return Err(CompatibilityError { current, previous }); + } + } + Ok(()) + } + + pub fn constraint_count(&self) -> usize { + self.coalesce_iterator().count() + } + + fn coalesce_iterator(&self) -> impl Iterator + '_ { + self.canonical_criteria + .iter() + .enumerate() + .flat_map(|(criterion_index, criterion)| { + RankingRule::from_criterion(criterion_index, criterion, &self.canonical_sort) + }) + .coalesce( + |previous @ RankingRule { source: previous_source, kind: previous_kind }, + current @ RankingRule { source, kind }| { + match (previous_kind, kind) { + (RankingRuleKind::Relevancy, RankingRuleKind::Relevancy) => { + let merged_source = match (previous_source, source) { + ( + RankingRuleSource::Criterion(previous), + RankingRuleSource::Criterion(current), + ) => RankingRuleSource::CoalescedCriteria(previous, current), + ( + RankingRuleSource::CoalescedCriteria(begin, _end), + RankingRuleSource::Criterion(current), + ) => RankingRuleSource::CoalescedCriteria(begin, current), + (_previous, current) => current, + }; + Ok(RankingRule { source: merged_source, kind }) + } + _ => Err((previous, current)), + } + }, + ) + } + + fn canonicalization_notes(&self) -> String { + use CanonicalizationAction::*; + let mut notes = String::new(); + for (index, action) in self.canonicalization_actions.iter().enumerate() { + let index = index + 1; + let _ = match action { + PrependedWords { prepended_index } => writeln!( + &mut notes, + " {index}. Prepended rule `words` before first relevancy rule `{}` at position {}", + prepended_index.rule_name(&self.source_criteria, &self.source_sort), + prepended_index.rule_position() + ), + RemovedDuplicate { earlier_occurrence, removed_occurrence } => writeln!( + &mut notes, + " {index}. Removed duplicate rule `{}` at position {} as it already appears at position {}", + earlier_occurrence.rule_name(&self.source_criteria, &self.source_sort), + removed_occurrence.rule_position(), + earlier_occurrence.rule_position(), + ), + RemovedWords { reason, removed_occurrence } => writeln!( + &mut notes, + " {index}. Removed rule `words` at position {} because {reason}", + removed_occurrence.rule_position() + ), + RemovedPlaceholder { removed_occurrence } => writeln!( + &mut notes, + " {index}. Removed relevancy rule `{}` at position {} because the query is a placeholder search (`q`: \"\")", + removed_occurrence.rule_name(&self.source_criteria, &self.source_sort), + removed_occurrence.rule_position() + ), + TruncatedVector { vector_rule, truncated_from } => writeln!( + &mut notes, + " {index}. Truncated relevancy rule `{}` at position {} and later rules because the query is a vector search and `vector` was inserted at position {}", + truncated_from.rule_name(&self.source_criteria, &self.source_sort), + truncated_from.rule_position(), + vector_rule.rule_position(), + ), + RemovedVector { vector_rule, removed_occurrence } => writeln!( + &mut notes, + " {index}. Removed relevancy rule `{}` at position {} because the query is a vector search and `vector` was already inserted at position {}", + removed_occurrence.rule_name(&self.source_criteria, &self.source_sort), + removed_occurrence.rule_position(), + vector_rule.rule_position(), + ), + RemovedSort { removed_occurrence } => writeln!( + &mut notes, + " {index}. Removed rule `sort` at position {} because `query.sort` is empty", +removed_occurrence.rule_position() + ), + }; + } + notes + } +} + +fn canonicalize_sort( + sorted_fields: &mut HashMap, + sort_query: &[AscDesc], + criterion_index: usize, + canonicalization_actions: &mut Vec, +) -> Vec { + let mut geo_sorted = None; + let mut canonical_sort = Vec::new(); + for (sort_index, asc_desc) in sort_query.iter().enumerate() { + let source = RankingRuleSource::Sort { criterion_index, sort_index }; + let asc_desc = asc_desc.clone(); + match asc_desc.clone() { + AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => { + match sorted_fields.entry(s) { + std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions + .push(CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: *entry.get(), + removed_occurrence: source, + }), + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(source); + canonical_sort.push(asc_desc); + } + } + } + AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => match geo_sorted { + Some(earlier_sort_index) => { + canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: RankingRuleSource::Sort { + criterion_index, + sort_index: earlier_sort_index, + }, + removed_occurrence: source, + }) + } + None => { + geo_sorted = Some(sort_index); + canonical_sort.push(asc_desc); + } + }, + } + } + canonical_sort +} + +fn canonicalize_criterion( + criterion: Criterion, + criterion_index: usize, + terms_matching_strategy: TermsMatchingStrategy, + words: &mut Option>, + canonicalization_actions: &mut Vec, + canonical_criteria: &mut Vec, + rule: &mut Option, +) { + *words = match (terms_matching_strategy, words.take()) { + (TermsMatchingStrategy::All, words) => words, + (_, None) => { + // inject words + canonicalization_actions.push(CanonicalizationAction::PrependedWords { + prepended_index: RankingRuleSource::Criterion(criterion_index), + }); + canonical_criteria.push(Criterion::Words); + Some(None) + } + (_, words) => words, + }; + if let Some(previous_index) = *rule { + canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: RankingRuleSource::Criterion(previous_index), + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }); + } else { + *rule = Some(criterion_index); + canonical_criteria.push(criterion) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum RankingRuleKind { + Relevancy, + AscendingSort, + DescendingSort, + AscendingGeoSort, + DescendingGeoSort, +} + +#[derive(Debug, Clone, Copy)] +pub struct RankingRule { + source: RankingRuleSource, + kind: RankingRuleKind, +} + +#[derive(Debug, Clone, Copy)] +pub enum RankingRuleSource { + Criterion(usize), + CoalescedCriteria(usize, usize), + Sort { criterion_index: usize, sort_index: usize }, +} + +impl RankingRuleSource { + fn rule_name(&self, criteria: &[Criterion], sort: &Option>) -> String { + match self { + RankingRuleSource::Criterion(criterion_index) => criteria + .get(*criterion_index) + .map(|c| c.to_string()) + .unwrap_or_else(|| "unknown".into()), + RankingRuleSource::CoalescedCriteria(begin, end) => { + let rules: Vec<_> = criteria + .get(*begin..=*end) + .iter() + .flat_map(|c| c.iter()) + .map(|c| c.to_string()) + .collect(); + rules.join(", ") + } + RankingRuleSource::Sort { criterion_index: _, sort_index } => { + match sort.as_deref().and_then(|sort| sort.get(*sort_index)) { + Some(sort) => match sort { + AscDesc::Asc(Member::Field(field_name)) => format!("{field_name}:asc"), + AscDesc::Desc(Member::Field(field_name)) => { + format!("{field_name}:desc") + } + AscDesc::Asc(Member::Geo(_)) => "_geo(..):asc".to_string(), + AscDesc::Desc(Member::Geo(_)) => "_geo(..):desc".to_string(), + }, + None => "unknown".into(), + } + } + } + } + + fn rule_position(&self) -> String { + match self { + RankingRuleSource::Criterion(criterion_index) => { + format!("#{criterion_index} in ranking rules") + } + RankingRuleSource::CoalescedCriteria(begin, end) => { + format!("#{begin} to #{end} in ranking rules") + } + RankingRuleSource::Sort { criterion_index, sort_index } => format!( + "#{sort_index} in `query.sort` (as `sort` is #{criterion_index} in ranking rules)" + ), + } + } +} + +impl RankingRule { + fn from_criterion<'a>( + criterion_index: usize, + criterion: &'a Criterion, + sort: &'a Option>, + ) -> impl Iterator + 'a { + let kind = match criterion { + Criterion::Words + | Criterion::Typo + | Criterion::Proximity + | Criterion::Attribute + | Criterion::Exactness => RankingRuleKind::Relevancy, + Criterion::Asc(s) if s == "_geo" => RankingRuleKind::AscendingGeoSort, + + Criterion::Asc(_) => RankingRuleKind::AscendingSort, + Criterion::Desc(s) if s == "_geo" => RankingRuleKind::DescendingGeoSort, + + Criterion::Desc(_) => RankingRuleKind::DescendingSort, + Criterion::Sort => { + return either::Right(sort.iter().flatten().enumerate().map( + move |(rule_index, asc_desc)| { + Self::from_asc_desc(asc_desc, criterion_index, rule_index) + }, + )) + } + }; + + either::Left(std::iter::once(Self { + source: RankingRuleSource::Criterion(criterion_index), + kind, + })) + } + + fn from_asc_desc(asc_desc: &AscDesc, sort_index: usize, rule_index_in_sort: usize) -> Self { + let kind = match asc_desc { + AscDesc::Asc(Member::Field(_)) => RankingRuleKind::AscendingSort, + AscDesc::Desc(Member::Field(_)) => RankingRuleKind::DescendingSort, + AscDesc::Asc(Member::Geo(_)) => RankingRuleKind::AscendingGeoSort, + AscDesc::Desc(Member::Geo(_)) => RankingRuleKind::DescendingGeoSort, + }; + Self { + source: RankingRuleSource::Sort { + criterion_index: sort_index, + sort_index: rule_index_in_sort, + }, + kind, + } + } + + fn as_string( + &self, + canonical_criteria: &[Criterion], + canonical_sort: &Option>, + query_index: usize, + index_uid: &str, + ) -> String { + let kind = match self.kind { + RankingRuleKind::Relevancy => "relevancy", + RankingRuleKind::AscendingSort => "ascending sort", + RankingRuleKind::DescendingSort => "descending sort", + RankingRuleKind::AscendingGeoSort => "ascending geo sort", + RankingRuleKind::DescendingGeoSort => "descending geo sort", + }; + let rules = self.fetch_from_source(canonical_criteria, canonical_sort); + + let source = match self.source { + RankingRuleSource::Criterion(criterion_index) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"), + RankingRuleSource::CoalescedCriteria(begin, end) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{begin}..={end}]`"), + RankingRuleSource::Sort { criterion_index, sort_index } => format!("`queries[{query_index}].sort[{sort_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"), + }; + + format!("{source}: {kind} {rules}") + } + + fn fetch_from_source( + &self, + canonical_criteria: &[Criterion], + canonical_sort: &Option>, + ) -> String { + let rule_name = match self.source { + RankingRuleSource::Criterion(index) => { + canonical_criteria.get(index).map(|criterion| criterion.to_string()) + } + RankingRuleSource::CoalescedCriteria(begin, end) => { + let rules: Vec = canonical_criteria + .get(begin..=end) + .into_iter() + .flat_map(|criteria| criteria.iter()) + .map(|criterion| criterion.to_string()) + .collect(); + + (!rules.is_empty()).then_some(rules.join(", ")) + } + RankingRuleSource::Sort { criterion_index: _, sort_index } => canonical_sort + .as_deref() + .and_then(|canonical_sort| canonical_sort.get(sort_index)) + .and_then(|asc_desc: &AscDesc| match asc_desc { + AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => { + Some(format!("on field `{s}`")) + } + _ => None, + }), + }; + + let rule_name = rule_name.unwrap_or_else(|| "default".into()); + + format!("rule(s) {rule_name}") + } +}