Start with a filterBoosting ranking rule when boostingFilter is present

This commit is contained in:
Clément Renault 2023-11-28 16:09:51 +01:00
parent f2041fd78c
commit b43edabbcd
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
13 changed files with 80 additions and 109 deletions

View File

@ -332,6 +332,7 @@ impl ErrorCode for milli::Error {
UserError::MaxDatabaseSizeReached => Code::DatabaseSizeLimitReached,
UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
UserError::InvalidFilter(_) => Code::InvalidSearchFilter,
UserError::InvalidBoostingFilter(_) => Code::InvalidSearchBoostingFilter,
UserError::InvalidFilterExpression(..) => Code::InvalidSearchFilter,
UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {

View File

@ -650,7 +650,7 @@ impl From<RankingRule> for RankingRuleView {
fn from(value: RankingRule) -> Self {
match value {
RankingRule::Words => RankingRuleView::Words,
RankingRule::Boost(filter) => RankingRuleView::Boost(filter),
RankingRule::FilterBoosting(filter) => RankingRuleView::Boost(filter),
RankingRule::Typo => RankingRuleView::Typo,
RankingRule::Proximity => RankingRuleView::Proximity,
RankingRule::Attribute => RankingRuleView::Attribute,
@ -665,7 +665,7 @@ impl From<RankingRuleView> for RankingRule {
fn from(value: RankingRuleView) -> Self {
match value {
RankingRuleView::Words => RankingRule::Words,
RankingRuleView::Boost(filter) => RankingRule::Boost(filter),
RankingRuleView::Boost(filter) => RankingRule::FilterBoosting(filter),
RankingRuleView::Typo => RankingRule::Typo,
RankingRuleView::Proximity => RankingRule::Proximity,
RankingRuleView::Attribute => RankingRule::Attribute,

View File

@ -58,6 +58,7 @@ fn main() -> Result<(), Box<dyn Error>> {
false,
&None,
&None,
&None,
GeoSortStrategy::default(),
0,
20,

View File

@ -1,38 +0,0 @@
//! This module provides the `Boost` type and defines all the errors related to this type.
use std::str::FromStr;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use crate::RankingRuleError;
/// This error type is never supposed to be shown to the end user.
/// You must always cast it to a sort error or a criterion error.
#[derive(Error, Debug)]
pub enum BoostError {
#[error("Invalid syntax for the boost parameter: expected expression starting with `boost:`, found `{name}`.")]
InvalidSyntax { name: String },
}
impl From<BoostError> for RankingRuleError {
fn from(error: BoostError) -> Self {
match error {
BoostError::InvalidSyntax { name } => RankingRuleError::InvalidName { name },
}
}
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct Boost(pub String);
impl FromStr for Boost {
type Err = BoostError;
fn from_str(text: &str) -> Result<Boost, Self::Err> {
match text.split_once(':') {
Some(("boost", right)) => Ok(Boost(right.to_string())), // TODO check filter validity
_ => Err(BoostError::InvalidSyntax { name: text.to_string() }),
}
}
}

View File

@ -116,6 +116,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
InvalidVectorsType { document_id: Value, value: Value },
#[error("{0}")]
InvalidFilter(String),
#[error("{0}")]
InvalidBoostingFilter(String),
#[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
InvalidFilterExpression(&'static [&'static str], Value),
#[error("Attribute `{}` is not sortable. {}",

View File

@ -9,7 +9,6 @@ pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
pub mod documents;
mod asc_desc;
mod boost;
pub mod distance;
mod error;
mod external_documents_ids;

View File

@ -4,8 +4,7 @@ use std::str::FromStr;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use crate::boost::{Boost, BoostError};
use crate::{AscDesc, AscDescError, Member};
use crate::{AscDesc, Member};
#[derive(Error, Debug)]
pub enum RankingRuleError {
@ -27,11 +26,11 @@ pub enum RankingRuleError {
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum RankingRule {
/// Sorted by documents matching the given filter and then documents not matching it.
FilterBoosting(String),
/// Sorted by decreasing number of matched query terms.
/// Query words at the front of an attribute is considered better than if it was at the back.
Words,
/// Sorted by documents matching the given filter and then documents not matching it.
Boost(String),
/// Sorted by increasing number of typos.
Typo,
/// Sorted by increasing distance between matched query terms.
@ -71,8 +70,8 @@ impl FromStr for RankingRule {
"attribute" => Ok(RankingRule::Attribute),
"sort" => Ok(RankingRule::Sort),
"exactness" => Ok(RankingRule::Exactness),
text => match (AscDesc::from_str(text), Boost::from_str(text)) {
(Ok(asc_desc), _) => match asc_desc {
text => match AscDesc::from_str(text) {
Ok(asc_desc) => match asc_desc {
AscDesc::Asc(Member::Field(field)) => Ok(RankingRule::Asc(field)),
AscDesc::Desc(Member::Field(field)) => Ok(RankingRule::Desc(field)),
AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => {
@ -81,15 +80,7 @@ impl FromStr for RankingRule {
})?
}
},
(_, Ok(Boost(filter))) => Ok(RankingRule::Boost(filter)),
(
Err(AscDescError::InvalidSyntax { name: asc_desc_name }),
Err(BoostError::InvalidSyntax { name: boost_name }),
) => Err(RankingRuleError::InvalidName {
// TODO improve the error message quality
name: format!("{asc_desc_name} {boost_name}"),
}),
(Err(asc_desc_error), _) => Err(asc_desc_error.into()),
Err(err) => Err(err.into()),
},
}
}
@ -112,7 +103,7 @@ impl fmt::Display for RankingRule {
match self {
Words => f.write_str("words"),
Boost(filter) => write!(f, "boost:{filter}"),
FilterBoosting(_) => write!(f, "filterBoosting"),
Typo => f.write_str("typo"),
Proximity => f.write_str("proximity"),
Attribute => f.write_str("attribute"),

View File

@ -5,7 +5,7 @@ use crate::distance_between_two_points;
#[derive(Debug, Clone, PartialEq)]
pub enum ScoreDetails {
Words(Words),
Boost(Boost),
FilterBoosting(FilterBoosting),
Typo(Typo),
Proximity(Rank),
Fid(Rank),
@ -24,7 +24,7 @@ impl ScoreDetails {
pub fn rank(&self) -> Option<Rank> {
match self {
ScoreDetails::Words(details) => Some(details.rank()),
ScoreDetails::Boost(_) => None,
ScoreDetails::FilterBoosting(_) => None,
ScoreDetails::Typo(details) => Some(details.rank()),
ScoreDetails::Proximity(details) => Some(*details),
ScoreDetails::Fid(details) => Some(*details),
@ -62,12 +62,9 @@ impl ScoreDetails {
details_map.insert("words".into(), words_details);
order += 1;
}
ScoreDetails::Boost(Boost { filter, matching }) => {
let sort = format!("boost:{}", filter);
let sort_details = serde_json::json!({
"value": matching,
});
details_map.insert(sort, sort_details);
ScoreDetails::FilterBoosting(FilterBoosting { matching }) => {
let sort_details = serde_json::json!({ "matching": matching });
details_map.insert("filterBoosting".into(), sort_details);
order += 1;
}
ScoreDetails::Typo(typo) => {
@ -232,8 +229,7 @@ impl Words {
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Boost {
pub filter: String,
pub struct FilterBoosting {
pub matching: bool,
}

View File

@ -38,6 +38,7 @@ pub struct Search<'a> {
vector: Option<Vec<f32>>,
// this should be linked to the String in the query
filter: Option<Filter<'a>>,
boosting_filter: Option<Filter<'a>>,
offset: usize,
limit: usize,
sort_criteria: Option<Vec<AscDesc>>,
@ -57,6 +58,7 @@ impl<'a> Search<'a> {
query: None,
vector: None,
filter: None,
boosting_filter: None,
offset: 0,
limit: 20,
sort_criteria: None,
@ -121,6 +123,11 @@ impl<'a> Search<'a> {
self
}
pub fn boosting_filter(&mut self, condition: Filter<'a>) -> &mut Search<'a> {
self.boosting_filter = Some(condition);
self
}
#[cfg(test)]
pub fn geo_sort_strategy(&mut self, strategy: new::GeoSortStrategy) -> &mut Search<'a> {
self.geo_strategy = strategy;
@ -150,6 +157,7 @@ impl<'a> Search<'a> {
self.scoring_strategy,
self.exhaustive_number_hits,
&self.filter,
&self.boosting_filter,
&self.sort_criteria,
self.geo_strategy,
self.offset,
@ -175,6 +183,7 @@ impl fmt::Debug for Search<'_> {
query,
vector: _,
filter,
boosting_filter,
offset,
limit,
sort_criteria,
@ -191,6 +200,7 @@ impl fmt::Debug for Search<'_> {
.field("query", query)
.field("vector", &"[...]")
.field("filter", filter)
.field("boosting_filter", boosting_filter)
.field("offset", offset)
.field("limit", limit)
.field("sort_criteria", sort_criteria)

View File

@ -5,29 +5,26 @@ use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext
use crate::score_details::{self, ScoreDetails};
use crate::{Filter, Result};
pub struct Boost<Query> {
original_expression: String,
pub struct FilterBoosting<'f, Query> {
filter: Filter<'f>,
original_query: Option<Query>,
matching: Option<RankingRuleOutput<Query>>,
non_matching: Option<RankingRuleOutput<Query>>,
}
impl<Query> Boost<Query> {
pub fn new(expression: String) -> Result<Self> {
Ok(Self {
original_expression: expression,
original_query: None,
matching: None,
non_matching: None,
})
impl<'f, Query> FilterBoosting<'f, Query> {
pub fn new(filter: Filter<'f>) -> Result<Self> {
Ok(Self { filter, original_query: None, matching: None, non_matching: None })
}
}
impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Boost<Query> {
impl<'ctx, 'f, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query>
for FilterBoosting<'f, Query>
{
fn id(&self) -> String {
// TODO improve this
let Self { original_expression, .. } = self;
format!("boost:{original_expression}")
let Self { filter: original_expression, .. } = self;
format!("boost:{original_expression:?}")
}
fn start_iteration(
@ -37,9 +34,9 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Boost<Quer
parent_candidates: &RoaringBitmap,
parent_query: &Query,
) -> Result<()> {
let universe_matching = match Filter::from_str(&self.original_expression)? {
Some(filter) => filter.evaluate(ctx.txn, ctx.index)?,
None => RoaringBitmap::default(),
let universe_matching = match self.filter.evaluate(ctx.txn, ctx.index) {
Ok(documents) => documents,
Err(e) => return Err(e), // TODO manage the invalid_search_boosting_filter
};
let matching = parent_candidates & universe_matching;
let non_matching = parent_candidates - &matching;
@ -49,19 +46,13 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Boost<Quer
self.matching = Some(RankingRuleOutput {
query: parent_query.clone(),
candidates: matching,
score: ScoreDetails::Boost(score_details::Boost {
filter: self.original_expression.clone(),
matching: true,
}),
score: ScoreDetails::FilterBoosting(score_details::FilterBoosting { matching: true }),
});
self.non_matching = Some(RankingRuleOutput {
query: parent_query.clone(),
candidates: non_matching,
score: ScoreDetails::Boost(score_details::Boost {
filter: self.original_expression.clone(),
matching: false,
}),
score: ScoreDetails::FilterBoosting(score_details::FilterBoosting { matching: false }),
});
Ok(())

View File

@ -512,6 +512,7 @@ mod tests {
false,
&None,
&None,
&None,
crate::search::new::GeoSortStrategy::default(),
0,
100,

View File

@ -14,8 +14,8 @@ mod ranking_rules;
mod resolve_query_graph;
mod small_bitmap;
mod boost;
mod exact_attribute;
mod filter_boosting;
mod sort;
#[cfg(test)]
@ -23,11 +23,11 @@ mod tests;
use std::collections::HashSet;
use boost::Boost;
use bucket_sort::{bucket_sort, BucketSortOutput};
use charabia::TokenizerBuilder;
use db_cache::DatabaseCache;
use exact_attribute::ExactAttribute;
use filter_boosting::FilterBoosting;
use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo};
use heed::RoTxn;
use instant_distance::Search;
@ -192,25 +192,29 @@ fn resolve_universe(
}
/// Return the list of initialised ranking rules to be used for a placeholder search.
fn get_ranking_rules_for_placeholder_search<'ctx>(
fn get_ranking_rules_for_placeholder_search<'ctx, 'f: 'ctx>(
ctx: &SearchContext<'ctx>,
sort_criteria: &Option<Vec<AscDesc>>,
geo_strategy: geo_sort::Strategy,
boosting_filter: &Option<Filter<'f>>,
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
let mut sort = false;
let mut sorted_fields = HashSet::new();
let mut geo_sorted = false;
let mut ranking_rules: Vec<BoxRankingRule<PlaceholderQuery>> = vec![];
let mut ranking_rules: Vec<BoxRankingRule<_>> = match boosting_filter {
Some(filter) => vec![Box::new(FilterBoosting::new(filter.clone())?)],
None => Vec::new(),
};
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
for rr in settings_ranking_rules {
match rr {
// These rules need a query to have an effect; ignore them in placeholder search
crate::RankingRule::Words
crate::RankingRule::FilterBoosting(_)
| crate::RankingRule::Words
| crate::RankingRule::Typo
| crate::RankingRule::Attribute
| crate::RankingRule::Proximity
| crate::RankingRule::Exactness => continue,
crate::RankingRule::Boost(filter) => ranking_rules.push(Box::new(Boost::new(filter)?)),
crate::RankingRule::Sort => {
if sort {
continue;
@ -245,11 +249,12 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
}
/// Return the list of initialised ranking rules to be used for a query graph search.
fn get_ranking_rules_for_query_graph_search<'ctx>(
fn get_ranking_rules_for_query_graph_search<'ctx, 'f: 'ctx>(
ctx: &SearchContext<'ctx>,
sort_criteria: &Option<Vec<AscDesc>>,
geo_strategy: geo_sort::Strategy,
terms_matching_strategy: TermsMatchingStrategy,
boosting_filter: &Option<Filter<'f>>,
) -> Result<Vec<BoxRankingRule<'ctx, QueryGraph>>> {
// query graph search
let mut words = false;
@ -266,7 +271,10 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
words = true;
}
let mut ranking_rules: Vec<BoxRankingRule<QueryGraph>> = vec![];
let mut ranking_rules: Vec<BoxRankingRule<QueryGraph>> = match boosting_filter {
Some(filter) => vec![Box::new(FilterBoosting::new(filter.clone())?)],
None => Vec::new(),
};
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
for rr in settings_ranking_rules {
// Add Words before any of: typo, proximity, attribute
@ -290,8 +298,10 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
ranking_rules.push(Box::new(Words::new(terms_matching_strategy)));
words = true;
}
crate::RankingRule::Boost(filter) => {
ranking_rules.push(Box::new(Boost::new(filter)?));
crate::RankingRule::FilterBoosting(_) => {
// that is not possible to define the filterBoosting ranking rule by hand,
// or by using the seetings. It is always inserted by the engine itself.
continue;
}
crate::RankingRule::Typo => {
if typo {
@ -413,14 +423,15 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
}
#[allow(clippy::too_many_arguments)]
pub fn execute_search(
ctx: &mut SearchContext,
pub fn execute_search<'ctx, 'f: 'ctx>(
ctx: &mut SearchContext<'ctx>,
query: &Option<String>,
vector: &Option<Vec<f32>>,
terms_matching_strategy: TermsMatchingStrategy,
scoring_strategy: ScoringStrategy,
exhaustive_number_hits: bool,
filters: &Option<Filter>,
filter: &Option<Filter>,
boosting_filter: &Option<Filter<'f>>,
sort_criteria: &Option<Vec<AscDesc>>,
geo_strategy: geo_sort::Strategy,
from: usize,
@ -429,8 +440,8 @@ pub fn execute_search(
placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
) -> Result<PartialSearchResult> {
let mut universe = if let Some(filters) = filters {
filters.evaluate(ctx.txn, ctx.index)?
let mut universe = if let Some(filter) = filter {
filter.evaluate(ctx.txn, ctx.index)?
} else {
ctx.index.documents_ids(ctx.txn)?
};
@ -523,6 +534,7 @@ pub fn execute_search(
sort_criteria,
geo_strategy,
terms_matching_strategy,
boosting_filter,
)?;
universe =
@ -539,8 +551,13 @@ pub fn execute_search(
query_graph_logger,
)?
} else {
let ranking_rules =
get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?;
let ranking_rules = get_ranking_rules_for_placeholder_search(
ctx,
sort_criteria,
geo_strategy,
boosting_filter,
)?;
bucket_sort(
ctx,
ranking_rules,

View File

@ -147,7 +147,7 @@ pub fn expected_order(
new_groups
.extend(group.linear_group_by_key(|d| d.asc_desc_rank).map(Vec::from));
}
RankingRule::Boost(filter) => {
RankingRule::FilterBoosting(filter) => {
// move the matching documents first, then the ones that don't match
group.sort_by_key(|d| if execute_filter(filter, d).is_some() { 0 } else { 1 });
new_groups.extend(