From 67f7470c836150bb41d7926e136915b01d208ead Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 5 Mar 2025 15:42:10 +0100 Subject: [PATCH] Apply PR requests related to Refactor search and facet-search --- crates/meilisearch/tests/search/errors.rs | 10 +- crates/meilisearch/tests/search/filters.rs | 8 +- crates/milli/src/error.rs | 9 +- .../milli/src/filterable_attributes_rules.rs | 32 +++-- crates/milli/src/search/facet/filter.rs | 132 +++++++++++------- 5 files changed, 111 insertions(+), 80 deletions(-) diff --git a/crates/meilisearch/tests/search/errors.rs b/crates/meilisearch/tests/search/errors.rs index 05d2d2563..0ea121a7d 100644 --- a/crates/meilisearch/tests/search/errors.rs +++ b/crates/meilisearch/tests/search/errors.rs @@ -894,7 +894,7 @@ async fn search_with_pattern_filter_settings_errors() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`, allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.", + "message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`.\n - Note: allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.\n - Note: field `cattos` matched rule #0 in `filterableAttributes`", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -920,7 +920,7 @@ async fn search_with_pattern_filter_settings_errors() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`, allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.", + "message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`.\n - Note: allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.\n - Note: field `cattos` matched rule #0 in `filterableAttributes`", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -941,7 +941,7 @@ async fn search_with_pattern_filter_settings_errors() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`, allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.", + "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -967,7 +967,7 @@ async fn search_with_pattern_filter_settings_errors() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`, allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.", + "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -993,7 +993,7 @@ async fn search_with_pattern_filter_settings_errors() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Filter operator `TO` is not allowed for the attribute `doggos.age`, allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.", + "message": "Index `test`: Filter operator `TO` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" diff --git a/crates/meilisearch/tests/search/filters.rs b/crates/meilisearch/tests/search/filters.rs index 818ffabaa..4ee280646 100644 --- a/crates/meilisearch/tests/search/filters.rs +++ b/crates/meilisearch/tests/search/filters.rs @@ -335,7 +335,7 @@ async fn search_with_pattern_filter_settings_scenario_1() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`, allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.", + "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -481,7 +481,7 @@ async fn search_with_pattern_filter_settings_scenario_1() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`, allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.", + "message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`.\n - Note: allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.\n - Note: field `cattos` matched rule #0 in `filterableAttributes`", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -613,7 +613,7 @@ async fn search_with_pattern_filter_settings_scenario_1() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`, allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.", + "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -720,7 +720,7 @@ async fn test_filterable_attributes_priority() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Attribute `doggos.age` is not filterable. Available filterable attributes are: `doggos.age`, `doggos.name`.\n1:11 doggos.age > 2", + "message": "Index `test`: Attribute `doggos.age` is not filterable. Available filterable attributes are: `doggos.name`.\n1:11 doggos.age > 2", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs index 857a812cd..b34c2bd9a 100644 --- a/crates/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -138,8 +138,13 @@ and can not be more than 511 bytes.", .document_id.to_string() InvalidFilter(String), #[error("Invalid type for filter subexpression: expected: {}, found: {}.", .0.join(", "), .1)] InvalidFilterExpression(&'static [&'static str], Value), - #[error("Filter operator `{operator}` is not allowed for the attribute `{field}`, allowed operators: {}.", allowed_operators.join(", "))] - FilterOperatorNotAllowed { field: String, allowed_operators: Vec, operator: String }, + #[error("Filter operator `{operator}` is not allowed for the attribute `{field}`.\n - Note: allowed operators: {}.\n - Note: field `{field}` matched rule #{rule_index} in `filterableAttributes`", allowed_operators.join(", "))] + FilterOperatorNotAllowed { + field: String, + allowed_operators: Vec, + operator: String, + rule_index: usize, + }, #[error("Attribute `{}` is not sortable. {}", .field, match .valid_fields.is_empty() { diff --git a/crates/milli/src/filterable_attributes_rules.rs b/crates/milli/src/filterable_attributes_rules.rs index efef46810..dbc6d72af 100644 --- a/crates/milli/src/filterable_attributes_rules.rs +++ b/crates/milli/src/filterable_attributes_rules.rs @@ -236,16 +236,13 @@ impl Default for FilterFeatures { pub fn filtered_matching_field_names<'fim>( filterable_attributes: &[FilterableAttributesRule], fields_ids_map: &'fim FieldsIdsMap, - filter: &impl Fn(&FilterableAttributesFeatures) -> bool, + filter: &impl Fn(FilterableAttributesFeatures) -> bool, ) -> BTreeSet<&'fim str> { let mut result = BTreeSet::new(); for (_, field_name) in fields_ids_map.iter() { - for filterable_attribute in filterable_attributes { - if filterable_attribute.match_str(field_name) == PatternMatch::Match { - let features = filterable_attribute.features(); - if filter(&features) { - result.insert(field_name); - } + if let Some((_, features)) = matching_features(field_name, filterable_attributes) { + if filter(features) { + result.insert(field_name); } } } @@ -260,13 +257,18 @@ pub fn filtered_matching_field_names<'fim>( /// /// * `field_name` - The field name to match against. /// * `filterable_attributes` - The set of filterable attributes rules to match against. +/// +/// # Returns +/// +/// * `Some((rule_index, features))` - The features of the matching rule and the index of the rule in the `filterable_attributes` array. +/// * `None` - No matching rule was found. pub fn matching_features( field_name: &str, filterable_attributes: &[FilterableAttributesRule], -) -> Option { - for filterable_attribute in filterable_attributes { +) -> Option<(usize, FilterableAttributesFeatures)> { + for (id, filterable_attribute) in filterable_attributes.iter().enumerate() { if filterable_attribute.match_str(field_name) == PatternMatch::Match { - return Some(filterable_attribute.features()); + return Some((id, filterable_attribute.features())); } } None @@ -283,7 +285,7 @@ pub fn is_field_filterable( filterable_attributes: &[FilterableAttributesRule], ) -> bool { matching_features(field_name, filterable_attributes) - .map_or(false, |features| features.is_filterable()) + .map_or(false, |(_, features)| features.is_filterable()) } /// Check if a field is facet searchable calling the method `FilterableAttributesFeatures::is_facet_searchable()`. @@ -297,7 +299,7 @@ pub fn is_field_facet_searchable( filterable_attributes: &[FilterableAttributesRule], ) -> bool { matching_features(field_name, filterable_attributes) - .map_or(false, |features| features.is_facet_searchable()) + .map_or(false, |(_, features)| features.is_facet_searchable()) } /// Match a field against a set of filterable, facet searchable fields, distinct field, sortable fields, and asc_desc fields. @@ -339,7 +341,7 @@ pub fn match_faceted_field( fn match_pattern_by_features( field_name: &str, filterable_attributes: &[FilterableAttributesRule], - filter: &impl Fn(&FilterableAttributesFeatures) -> bool, + filter: &impl Fn(FilterableAttributesFeatures) -> bool, ) -> PatternMatch { let mut selection = PatternMatch::NoMatch; @@ -353,7 +355,7 @@ fn match_pattern_by_features( match pattern.match_str(field_name) { PatternMatch::Match => { let features = pattern.features(); - if filter(&features) && can_match { + if filter(features) && can_match { return PatternMatch::Match; } else { can_match = false; @@ -361,7 +363,7 @@ fn match_pattern_by_features( } PatternMatch::Parent => { let features = pattern.features(); - if filter(&features) { + if filter(features) { selection = PatternMatch::Parent; } } diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs index fa3e4ea28..bc7209ef9 100644 --- a/crates/milli/src/search/facet/filter.rs +++ b/crates/milli/src/search/facet/filter.rs @@ -20,8 +20,9 @@ use crate::heed_codec::facet::{ }; use crate::index::db_name::FACET_ID_STRING_DOCIDS; use crate::{ - distance_between_two_points, lat_lng_to_xyz, FieldId, FilterableAttributesFeatures, - FilterableAttributesRule, Index, InternalError, Result, SerializationError, + distance_between_two_points, lat_lng_to_xyz, FieldId, FieldsIdsMap, + FilterableAttributesFeatures, FilterableAttributesRule, Index, InternalError, Result, + SerializationError, }; /// The maximum number of filters the filter AST can process. @@ -233,11 +234,11 @@ impl<'a> Filter<'a> { impl<'a> Filter<'a> { pub fn evaluate(&self, rtxn: &heed::RoTxn<'_>, index: &Index) -> Result { // to avoid doing this for each recursive call we're going to do it ONCE ahead of time + let fields_ids_map = index.fields_ids_map(rtxn)?; let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; for fid in self.condition.fids(MAX_FILTER_DEPTH) { let attribute = fid.value(); if !is_field_filterable(attribute, &filterable_attributes_rules) { - let fields_ids_map = index.fields_ids_map(rtxn)?; return Err(fid.as_external_error(FilterError::AttributeNotFilterable { attribute, filterable_fields: filtered_matching_field_names( @@ -248,7 +249,7 @@ impl<'a> Filter<'a> { }))?; } } - self.inner_evaluate(rtxn, index, &filterable_attributes_rules, None) + self.inner_evaluate(rtxn, index, &fields_ids_map, &filterable_attributes_rules, None) } fn evaluate_operator( @@ -258,6 +259,7 @@ impl<'a> Filter<'a> { universe: Option<&RoaringBitmap>, operator: &Condition<'a>, features: &FilterableAttributesFeatures, + rule_index: usize, ) -> Result { let numbers_db = index.facet_id_f64_docids; let strings_db = index.facet_id_string_docids; @@ -275,19 +277,29 @@ impl<'a> Filter<'a> { | Condition::Between { .. } if !features.is_filterable_comparison() => { - return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + return Err(generate_filter_error( + rtxn, index, field_id, operator, features, rule_index, + )); } Condition::Empty if !features.is_filterable_empty() => { - return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + return Err(generate_filter_error( + rtxn, index, field_id, operator, features, rule_index, + )); } Condition::Null if !features.is_filterable_null() => { - return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + return Err(generate_filter_error( + rtxn, index, field_id, operator, features, rule_index, + )); } Condition::Exists if !features.is_filterable_exists() => { - return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + return Err(generate_filter_error( + rtxn, index, field_id, operator, features, rule_index, + )); } Condition::Equal(_) | Condition::NotEqual(_) if !features.is_filterable_equality() => { - return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + return Err(generate_filter_error( + rtxn, index, field_id, operator, features, rule_index, + )); } Condition::GreaterThan(val) => { (Excluded(val.parse_finite_float()?), Included(f64::MAX)) @@ -338,8 +350,9 @@ impl<'a> Filter<'a> { } Condition::NotEqual(val) => { let operator = Condition::Equal(val.clone()); - let docids = - Self::evaluate_operator(rtxn, index, field_id, None, &operator, features)?; + let docids = Self::evaluate_operator( + rtxn, index, field_id, None, &operator, features, rule_index, + )?; let all_ids = index.documents_ids(rtxn)?; return Ok(all_ids - docids); } @@ -441,7 +454,8 @@ impl<'a> Filter<'a> { &self, rtxn: &heed::RoTxn<'_>, index: &Index, - filterable_fields: &[FilterableAttributesRule], + field_ids_map: &FieldsIdsMap, + filterable_attribute_rules: &[FilterableAttributesRule], universe: Option<&RoaringBitmap>, ) -> Result { if universe.map_or(false, |u| u.is_empty()) { @@ -454,7 +468,8 @@ impl<'a> Filter<'a> { &(f.as_ref().clone()).into(), rtxn, index, - filterable_fields, + field_ids_map, + filterable_attribute_rules, universe, )?; match universe { @@ -466,15 +481,14 @@ impl<'a> Filter<'a> { } } FilterCondition::In { fid, els } => { - match matching_features(fid.value(), filterable_fields) { - Some(features) if features.is_filterable() => { - let field_ids_map = index.fields_ids_map(rtxn)?; + match matching_features(fid.value(), filterable_attribute_rules) { + Some((rule_index, features)) if features.is_filterable() => { if let Some(fid) = field_ids_map.id(fid.value()) { els.iter() .map(|el| Condition::Equal(el.clone())) .map(|op| { Self::evaluate_operator( - rtxn, index, fid, universe, &op, &features, + rtxn, index, fid, universe, &op, &features, rule_index, ) }) .union() @@ -482,46 +496,50 @@ impl<'a> Filter<'a> { Ok(RoaringBitmap::new()) } } - _ => { - let field_ids_map = index.fields_ids_map(rtxn)?; - Err(fid.as_external_error(FilterError::AttributeNotFilterable { - attribute: fid.value(), - filterable_fields: filtered_matching_field_names( - filterable_fields, - &field_ids_map, - &|features| features.is_filterable(), - ), - }))? - } + _ => Err(fid.as_external_error(FilterError::AttributeNotFilterable { + attribute: fid.value(), + filterable_fields: filtered_matching_field_names( + filterable_attribute_rules, + &field_ids_map, + &|features| features.is_filterable(), + ), + }))?, } } FilterCondition::Condition { fid, op } => { - match matching_features(fid.value(), filterable_fields) { - Some(features) if features.is_filterable() => { - let field_ids_map = index.fields_ids_map(rtxn)?; + match matching_features(fid.value(), filterable_attribute_rules) { + Some((rule_index, features)) if features.is_filterable() => { if let Some(fid) = field_ids_map.id(fid.value()) { - Self::evaluate_operator(rtxn, index, fid, universe, op, &features) + Self::evaluate_operator( + rtxn, index, fid, universe, op, &features, rule_index, + ) } else { Ok(RoaringBitmap::new()) } } - _ => { - let field_ids_map = index.fields_ids_map(rtxn)?; - Err(fid.as_external_error(FilterError::AttributeNotFilterable { - attribute: fid.value(), - filterable_fields: filtered_matching_field_names( - filterable_fields, - &field_ids_map, - &|features| features.is_filterable(), - ), - }))? - } + _ => Err(fid.as_external_error(FilterError::AttributeNotFilterable { + attribute: fid.value(), + filterable_fields: filtered_matching_field_names( + filterable_attribute_rules, + &field_ids_map, + &|features| features.is_filterable(), + ), + }))?, } } FilterCondition::Or(subfilters) => subfilters .iter() .cloned() - .map(|f| Self::inner_evaluate(&f.into(), rtxn, index, filterable_fields, universe)) + .map(|f| { + Self::inner_evaluate( + &f.into(), + rtxn, + index, + field_ids_map, + filterable_attribute_rules, + universe, + ) + }) .union(), FilterCondition::And(subfilters) => { let mut subfilters_iter = subfilters.iter(); @@ -530,7 +548,8 @@ impl<'a> Filter<'a> { &(first_subfilter.clone()).into(), rtxn, index, - filterable_fields, + field_ids_map, + filterable_attribute_rules, universe, )?; for f in subfilters_iter { @@ -544,7 +563,8 @@ impl<'a> Filter<'a> { &(f.clone()).into(), rtxn, index, - filterable_fields, + field_ids_map, + filterable_attribute_rules, Some(&bitmap), )?; } @@ -582,11 +602,10 @@ impl<'a> Filter<'a> { Ok(result) } else { - let field_ids_map = index.fields_ids_map(rtxn)?; Err(point[0].as_external_error(FilterError::AttributeNotFilterable { attribute: RESERVED_GEO_FIELD_NAME, filterable_fields: filtered_matching_field_names( - filterable_fields, + filterable_attribute_rules, &field_ids_map, &|features| features.is_filterable(), ), @@ -649,7 +668,8 @@ impl<'a> Filter<'a> { let selected_lat = Filter { condition: condition_lat }.inner_evaluate( rtxn, index, - filterable_fields, + field_ids_map, + filterable_attribute_rules, universe, )?; @@ -682,7 +702,8 @@ impl<'a> Filter<'a> { let left = Filter { condition: condition_left }.inner_evaluate( rtxn, index, - filterable_fields, + field_ids_map, + filterable_attribute_rules, universe, )?; @@ -696,7 +717,8 @@ impl<'a> Filter<'a> { let right = Filter { condition: condition_right }.inner_evaluate( rtxn, index, - filterable_fields, + field_ids_map, + filterable_attribute_rules, universe, )?; @@ -712,19 +734,19 @@ impl<'a> Filter<'a> { Filter { condition: condition_lng }.inner_evaluate( rtxn, index, - filterable_fields, + field_ids_map, + filterable_attribute_rules, universe, )? }; Ok(selected_lat & selected_lng) } else { - let field_ids_map = index.fields_ids_map(rtxn)?; Err(top_right_point[0].as_external_error( FilterError::AttributeNotFilterable { attribute: RESERVED_GEO_FIELD_NAME, filterable_fields: filtered_matching_field_names( - filterable_fields, + filterable_attribute_rules, &field_ids_map, &|features| features.is_filterable(), ), @@ -742,6 +764,7 @@ fn generate_filter_error( field_id: FieldId, operator: &Condition<'_>, features: &FilterableAttributesFeatures, + rule_index: usize, ) -> Error { match index.fields_ids_map(rtxn) { Ok(fields_ids_map) => { @@ -750,6 +773,7 @@ fn generate_filter_error( field: field.to_string(), allowed_operators: features.allowed_filter_operators(), operator: operator.operator().to_string(), + rule_index, }) } Err(e) => e.into(),