diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 60a5d7257..0a4f1ad8a 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -332,6 +332,7 @@ impl ErrorCode for milli::Error { UserError::MaxDatabaseSizeReached => Code::DatabaseSizeLimitReached, UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, UserError::InvalidFilter(_) => Code::InvalidSearchFilter, + UserError::InvalidBoostingFilter(_) => Code::InvalidSearchBoostingFilter, UserError::InvalidFilterExpression(..) => Code::InvalidSearchFilter, UserError::MissingDocumentId { .. } => Code::MissingDocumentId, UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => { diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs index 9c390ba7a..12adae3b3 100644 --- a/meilisearch-types/src/settings.rs +++ b/meilisearch-types/src/settings.rs @@ -650,7 +650,7 @@ impl From for RankingRuleView { fn from(value: RankingRule) -> Self { match value { RankingRule::Words => RankingRuleView::Words, - RankingRule::Boost(filter) => RankingRuleView::Boost(filter), + RankingRule::FilterBoosting(filter) => RankingRuleView::Boost(filter), RankingRule::Typo => RankingRuleView::Typo, RankingRule::Proximity => RankingRuleView::Proximity, RankingRule::Attribute => RankingRuleView::Attribute, @@ -665,7 +665,7 @@ impl From for RankingRule { fn from(value: RankingRuleView) -> Self { match value { RankingRuleView::Words => RankingRule::Words, - RankingRuleView::Boost(filter) => RankingRule::Boost(filter), + RankingRuleView::Boost(filter) => RankingRule::FilterBoosting(filter), RankingRuleView::Typo => RankingRule::Typo, RankingRuleView::Proximity => RankingRule::Proximity, RankingRuleView::Attribute => RankingRule::Attribute, diff --git a/milli/examples/search.rs b/milli/examples/search.rs index 82de56434..04a2c5de3 100644 --- a/milli/examples/search.rs +++ b/milli/examples/search.rs @@ -58,6 +58,7 @@ fn main() -> Result<(), Box> { false, &None, &None, + &None, GeoSortStrategy::default(), 0, 20, diff --git a/milli/src/boost.rs b/milli/src/boost.rs deleted file mode 100644 index f90a68ee2..000000000 --- a/milli/src/boost.rs +++ /dev/null @@ -1,38 +0,0 @@ -//! This module provides the `Boost` type and defines all the errors related to this type. - -use std::str::FromStr; - -use serde::{Deserialize, Serialize}; -use thiserror::Error; - -use crate::RankingRuleError; - -/// This error type is never supposed to be shown to the end user. -/// You must always cast it to a sort error or a criterion error. -#[derive(Error, Debug)] -pub enum BoostError { - #[error("Invalid syntax for the boost parameter: expected expression starting with `boost:`, found `{name}`.")] - InvalidSyntax { name: String }, -} - -impl From for RankingRuleError { - fn from(error: BoostError) -> Self { - match error { - BoostError::InvalidSyntax { name } => RankingRuleError::InvalidName { name }, - } - } -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct Boost(pub String); - -impl FromStr for Boost { - type Err = BoostError; - - fn from_str(text: &str) -> Result { - match text.split_once(':') { - Some(("boost", right)) => Ok(Boost(right.to_string())), // TODO check filter validity - _ => Err(BoostError::InvalidSyntax { name: text.to_string() }), - } - } -} diff --git a/milli/src/error.rs b/milli/src/error.rs index 710031c30..5fcc57653 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -116,6 +116,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco InvalidVectorsType { document_id: Value, value: Value }, #[error("{0}")] InvalidFilter(String), + #[error("{0}")] + InvalidBoostingFilter(String), #[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))] InvalidFilterExpression(&'static [&'static str], Value), #[error("Attribute `{}` is not sortable. {}", diff --git a/milli/src/lib.rs b/milli/src/lib.rs index fb6201f1f..e9cef7d48 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -9,7 +9,6 @@ pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; pub mod documents; mod asc_desc; -mod boost; pub mod distance; mod error; mod external_documents_ids; diff --git a/milli/src/ranking_rule.rs b/milli/src/ranking_rule.rs index 18b8fc9a2..ed4edd62b 100644 --- a/milli/src/ranking_rule.rs +++ b/milli/src/ranking_rule.rs @@ -4,8 +4,7 @@ use std::str::FromStr; use serde::{Deserialize, Serialize}; use thiserror::Error; -use crate::boost::{Boost, BoostError}; -use crate::{AscDesc, AscDescError, Member}; +use crate::{AscDesc, Member}; #[derive(Error, Debug)] pub enum RankingRuleError { @@ -27,11 +26,11 @@ pub enum RankingRuleError { #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] pub enum RankingRule { + /// Sorted by documents matching the given filter and then documents not matching it. + FilterBoosting(String), /// Sorted by decreasing number of matched query terms. /// Query words at the front of an attribute is considered better than if it was at the back. Words, - /// Sorted by documents matching the given filter and then documents not matching it. - Boost(String), /// Sorted by increasing number of typos. Typo, /// Sorted by increasing distance between matched query terms. @@ -71,8 +70,8 @@ impl FromStr for RankingRule { "attribute" => Ok(RankingRule::Attribute), "sort" => Ok(RankingRule::Sort), "exactness" => Ok(RankingRule::Exactness), - text => match (AscDesc::from_str(text), Boost::from_str(text)) { - (Ok(asc_desc), _) => match asc_desc { + text => match AscDesc::from_str(text) { + Ok(asc_desc) => match asc_desc { AscDesc::Asc(Member::Field(field)) => Ok(RankingRule::Asc(field)), AscDesc::Desc(Member::Field(field)) => Ok(RankingRule::Desc(field)), AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => { @@ -81,15 +80,7 @@ impl FromStr for RankingRule { })? } }, - (_, Ok(Boost(filter))) => Ok(RankingRule::Boost(filter)), - ( - Err(AscDescError::InvalidSyntax { name: asc_desc_name }), - Err(BoostError::InvalidSyntax { name: boost_name }), - ) => Err(RankingRuleError::InvalidName { - // TODO improve the error message quality - name: format!("{asc_desc_name} {boost_name}"), - }), - (Err(asc_desc_error), _) => Err(asc_desc_error.into()), + Err(err) => Err(err.into()), }, } } @@ -112,7 +103,7 @@ impl fmt::Display for RankingRule { match self { Words => f.write_str("words"), - Boost(filter) => write!(f, "boost:{filter}"), + FilterBoosting(_) => write!(f, "filterBoosting"), Typo => f.write_str("typo"), Proximity => f.write_str("proximity"), Attribute => f.write_str("attribute"), diff --git a/milli/src/score_details.rs b/milli/src/score_details.rs index 4a264143d..251733421 100644 --- a/milli/src/score_details.rs +++ b/milli/src/score_details.rs @@ -5,7 +5,7 @@ use crate::distance_between_two_points; #[derive(Debug, Clone, PartialEq)] pub enum ScoreDetails { Words(Words), - Boost(Boost), + FilterBoosting(FilterBoosting), Typo(Typo), Proximity(Rank), Fid(Rank), @@ -24,7 +24,7 @@ impl ScoreDetails { pub fn rank(&self) -> Option { match self { ScoreDetails::Words(details) => Some(details.rank()), - ScoreDetails::Boost(_) => None, + ScoreDetails::FilterBoosting(_) => None, ScoreDetails::Typo(details) => Some(details.rank()), ScoreDetails::Proximity(details) => Some(*details), ScoreDetails::Fid(details) => Some(*details), @@ -62,12 +62,9 @@ impl ScoreDetails { details_map.insert("words".into(), words_details); order += 1; } - ScoreDetails::Boost(Boost { filter, matching }) => { - let sort = format!("boost:{}", filter); - let sort_details = serde_json::json!({ - "value": matching, - }); - details_map.insert(sort, sort_details); + ScoreDetails::FilterBoosting(FilterBoosting { matching }) => { + let sort_details = serde_json::json!({ "matching": matching }); + details_map.insert("filterBoosting".into(), sort_details); order += 1; } ScoreDetails::Typo(typo) => { @@ -232,8 +229,7 @@ impl Words { } #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Boost { - pub filter: String, +pub struct FilterBoosting { pub matching: bool, } diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 2c78acfdf..13898e366 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -38,6 +38,7 @@ pub struct Search<'a> { vector: Option>, // this should be linked to the String in the query filter: Option>, + boosting_filter: Option>, offset: usize, limit: usize, sort_criteria: Option>, @@ -57,6 +58,7 @@ impl<'a> Search<'a> { query: None, vector: None, filter: None, + boosting_filter: None, offset: 0, limit: 20, sort_criteria: None, @@ -121,6 +123,11 @@ impl<'a> Search<'a> { self } + pub fn boosting_filter(&mut self, condition: Filter<'a>) -> &mut Search<'a> { + self.boosting_filter = Some(condition); + self + } + #[cfg(test)] pub fn geo_sort_strategy(&mut self, strategy: new::GeoSortStrategy) -> &mut Search<'a> { self.geo_strategy = strategy; @@ -150,6 +157,7 @@ impl<'a> Search<'a> { self.scoring_strategy, self.exhaustive_number_hits, &self.filter, + &self.boosting_filter, &self.sort_criteria, self.geo_strategy, self.offset, @@ -175,6 +183,7 @@ impl fmt::Debug for Search<'_> { query, vector: _, filter, + boosting_filter, offset, limit, sort_criteria, @@ -191,6 +200,7 @@ impl fmt::Debug for Search<'_> { .field("query", query) .field("vector", &"[...]") .field("filter", filter) + .field("boosting_filter", boosting_filter) .field("offset", offset) .field("limit", limit) .field("sort_criteria", sort_criteria) diff --git a/milli/src/search/new/boost.rs b/milli/src/search/new/filter_boosting.rs similarity index 62% rename from milli/src/search/new/boost.rs rename to milli/src/search/new/filter_boosting.rs index f1a001a4c..15398cbc0 100644 --- a/milli/src/search/new/boost.rs +++ b/milli/src/search/new/filter_boosting.rs @@ -5,29 +5,26 @@ use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext use crate::score_details::{self, ScoreDetails}; use crate::{Filter, Result}; -pub struct Boost { - original_expression: String, +pub struct FilterBoosting<'f, Query> { + filter: Filter<'f>, original_query: Option, matching: Option>, non_matching: Option>, } -impl Boost { - pub fn new(expression: String) -> Result { - Ok(Self { - original_expression: expression, - original_query: None, - matching: None, - non_matching: None, - }) +impl<'f, Query> FilterBoosting<'f, Query> { + pub fn new(filter: Filter<'f>) -> Result { + Ok(Self { filter, original_query: None, matching: None, non_matching: None }) } } -impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Boost { +impl<'ctx, 'f, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> + for FilterBoosting<'f, Query> +{ fn id(&self) -> String { // TODO improve this - let Self { original_expression, .. } = self; - format!("boost:{original_expression}") + let Self { filter: original_expression, .. } = self; + format!("boost:{original_expression:?}") } fn start_iteration( @@ -37,9 +34,9 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Boost Result<()> { - let universe_matching = match Filter::from_str(&self.original_expression)? { - Some(filter) => filter.evaluate(ctx.txn, ctx.index)?, - None => RoaringBitmap::default(), + let universe_matching = match self.filter.evaluate(ctx.txn, ctx.index) { + Ok(documents) => documents, + Err(e) => return Err(e), // TODO manage the invalid_search_boosting_filter }; let matching = parent_candidates & universe_matching; let non_matching = parent_candidates - &matching; @@ -49,19 +46,13 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Boost( +fn get_ranking_rules_for_placeholder_search<'ctx, 'f: 'ctx>( ctx: &SearchContext<'ctx>, sort_criteria: &Option>, geo_strategy: geo_sort::Strategy, + boosting_filter: &Option>, ) -> Result>> { let mut sort = false; let mut sorted_fields = HashSet::new(); let mut geo_sorted = false; - let mut ranking_rules: Vec> = vec![]; + let mut ranking_rules: Vec> = match boosting_filter { + Some(filter) => vec![Box::new(FilterBoosting::new(filter.clone())?)], + None => Vec::new(), + }; let settings_ranking_rules = ctx.index.criteria(ctx.txn)?; for rr in settings_ranking_rules { match rr { // These rules need a query to have an effect; ignore them in placeholder search - crate::RankingRule::Words + crate::RankingRule::FilterBoosting(_) + | crate::RankingRule::Words | crate::RankingRule::Typo | crate::RankingRule::Attribute | crate::RankingRule::Proximity | crate::RankingRule::Exactness => continue, - crate::RankingRule::Boost(filter) => ranking_rules.push(Box::new(Boost::new(filter)?)), crate::RankingRule::Sort => { if sort { continue; @@ -245,11 +249,12 @@ fn get_ranking_rules_for_placeholder_search<'ctx>( } /// Return the list of initialised ranking rules to be used for a query graph search. -fn get_ranking_rules_for_query_graph_search<'ctx>( +fn get_ranking_rules_for_query_graph_search<'ctx, 'f: 'ctx>( ctx: &SearchContext<'ctx>, sort_criteria: &Option>, geo_strategy: geo_sort::Strategy, terms_matching_strategy: TermsMatchingStrategy, + boosting_filter: &Option>, ) -> Result>> { // query graph search let mut words = false; @@ -266,7 +271,10 @@ fn get_ranking_rules_for_query_graph_search<'ctx>( words = true; } - let mut ranking_rules: Vec> = vec![]; + let mut ranking_rules: Vec> = match boosting_filter { + Some(filter) => vec![Box::new(FilterBoosting::new(filter.clone())?)], + None => Vec::new(), + }; let settings_ranking_rules = ctx.index.criteria(ctx.txn)?; for rr in settings_ranking_rules { // Add Words before any of: typo, proximity, attribute @@ -290,8 +298,10 @@ fn get_ranking_rules_for_query_graph_search<'ctx>( ranking_rules.push(Box::new(Words::new(terms_matching_strategy))); words = true; } - crate::RankingRule::Boost(filter) => { - ranking_rules.push(Box::new(Boost::new(filter)?)); + crate::RankingRule::FilterBoosting(_) => { + // that is not possible to define the filterBoosting ranking rule by hand, + // or by using the seetings. It is always inserted by the engine itself. + continue; } crate::RankingRule::Typo => { if typo { @@ -413,14 +423,15 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>( } #[allow(clippy::too_many_arguments)] -pub fn execute_search( - ctx: &mut SearchContext, +pub fn execute_search<'ctx, 'f: 'ctx>( + ctx: &mut SearchContext<'ctx>, query: &Option, vector: &Option>, terms_matching_strategy: TermsMatchingStrategy, scoring_strategy: ScoringStrategy, exhaustive_number_hits: bool, - filters: &Option, + filter: &Option, + boosting_filter: &Option>, sort_criteria: &Option>, geo_strategy: geo_sort::Strategy, from: usize, @@ -429,8 +440,8 @@ pub fn execute_search( placeholder_search_logger: &mut dyn SearchLogger, query_graph_logger: &mut dyn SearchLogger, ) -> Result { - let mut universe = if let Some(filters) = filters { - filters.evaluate(ctx.txn, ctx.index)? + let mut universe = if let Some(filter) = filter { + filter.evaluate(ctx.txn, ctx.index)? } else { ctx.index.documents_ids(ctx.txn)? }; @@ -523,6 +534,7 @@ pub fn execute_search( sort_criteria, geo_strategy, terms_matching_strategy, + boosting_filter, )?; universe = @@ -539,8 +551,13 @@ pub fn execute_search( query_graph_logger, )? } else { - let ranking_rules = - get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?; + let ranking_rules = get_ranking_rules_for_placeholder_search( + ctx, + sort_criteria, + geo_strategy, + boosting_filter, + )?; + bucket_sort( ctx, ranking_rules, diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs index 2dd413ad7..60cf00f7b 100644 --- a/milli/tests/search/mod.rs +++ b/milli/tests/search/mod.rs @@ -147,7 +147,7 @@ pub fn expected_order( new_groups .extend(group.linear_group_by_key(|d| d.asc_desc_rank).map(Vec::from)); } - RankingRule::Boost(filter) => { + RankingRule::FilterBoosting(filter) => { // move the matching documents first, then the ones that don't match group.sort_by_key(|d| if execute_filter(filter, d).is_some() { 0 } else { 1 }); new_groups.extend(