From 967033579dd408c71b7056457336b324adea40ad Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 3 Mar 2025 10:25:32 +0100 Subject: [PATCH] Refactor search and facet-search **Changes:** The search filters are now using the FilterableAttributesFeatures from the FilterableAttributesRules to know if a field is filterable. Moreover, the FilterableAttributesFeatures is more precise and an error will be returned if an operator is used on a field that doesn't have the related feature. The facet-search is now checking if the feature is allowed in the FilterableAttributesFeatures and an error will be returned if the field doesn't have the related feature. **Impact:** - facet-search is now relying on AttributePatterns to match the locales - search using filters is now relying on FilterableAttributesFeatures - distinct attribute is now relying on FilterableAttributesRules --- crates/filter-parser/src/condition.rs | 19 ++ crates/meilisearch-types/src/error.rs | 1 + crates/meilisearch/src/search/mod.rs | 9 +- crates/milli/src/error.rs | 2 + .../milli/src/filterable_attributes_rules.rs | 104 +++++++ crates/milli/src/index.rs | 90 ++---- crates/milli/src/search/facet/filter.rs | 263 +++++++++++++----- crates/milli/src/search/facet/search.rs | 15 +- crates/milli/src/search/mod.rs | 16 +- 9 files changed, 365 insertions(+), 154 deletions(-) diff --git a/crates/filter-parser/src/condition.rs b/crates/filter-parser/src/condition.rs index 04b6dc266..0fc007bf1 100644 --- a/crates/filter-parser/src/condition.rs +++ b/crates/filter-parser/src/condition.rs @@ -30,6 +30,25 @@ pub enum Condition<'a> { StartsWith { keyword: Token<'a>, word: Token<'a> }, } +impl Condition<'_> { + pub fn operator(&self) -> &str { + match self { + Condition::GreaterThan(_) => ">", + Condition::GreaterThanOrEqual(_) => ">=", + Condition::Equal(_) => "=", + Condition::NotEqual(_) => "!=", + Condition::Null => "IS NULL", + Condition::Empty => "IS EMPTY", + Condition::Exists => "EXISTS", + Condition::LowerThan(_) => "<", + Condition::LowerThanOrEqual(_) => "<=", + Condition::Between { .. } => "TO", + Condition::Contains { .. } => "CONTAINS", + Condition::StartsWith { .. } => "STARTS WITH", + } + } +} + /// condition = value ("==" | ">" ...) value pub fn parse_condition(input: Span) -> IResult { let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs index f64301b8c..7db4f9d9a 100644 --- a/crates/meilisearch-types/src/error.rs +++ b/crates/meilisearch-types/src/error.rs @@ -414,6 +414,7 @@ impl ErrorCode for milli::Error { UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, UserError::InvalidFilter(_) => Code::InvalidSearchFilter, UserError::InvalidFilterExpression(..) => Code::InvalidSearchFilter, + UserError::FilterOperatorNotAllowed { .. } => Code::InvalidSearchFilter, UserError::MissingDocumentId { .. } => Code::MissingDocumentId, UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => { Code::InvalidDocumentId diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index 2091047fc..a16f4eb6a 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -20,7 +20,7 @@ use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy}; use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors; use meilisearch_types::milli::vector::Embedder; use meilisearch_types::milli::{ - FacetValueHit, InternalError, OrderBy, SearchForFacetValues, TimeBudget, + FacetValueHit, InternalError, OrderBy, PatternMatch, SearchForFacetValues, TimeBudget, }; use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::{milli, Document}; @@ -1538,8 +1538,9 @@ pub fn perform_facet_search( // If the facet string is not localized, we **ignore** the locales provided by the user because the facet data has no locale. // If the user does not provide locales, we use the locales of the facet string. let localized_attributes = index.localized_attributes_rules(&rtxn)?.unwrap_or_default(); - let localized_attributes_locales = - localized_attributes.into_iter().find(|attr| attr.match_str(&facet_name)); + let localized_attributes_locales = localized_attributes + .into_iter() + .find(|attr| attr.match_str(&facet_name) == PatternMatch::Match); let locales = localized_attributes_locales.map(|attr| { attr.locales .into_iter() @@ -1885,7 +1886,7 @@ fn format_fields( let locales = locales.or_else(|| { localized_attributes .iter() - .find(|rule| rule.match_str(key)) + .find(|rule| rule.match_str(key) == PatternMatch::Match) .map(LocalizedAttributesRule::locales) }); diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs index c8ed1912f..857a812cd 100644 --- a/crates/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -138,6 +138,8 @@ and can not be more than 511 bytes.", .document_id.to_string() InvalidFilter(String), #[error("Invalid type for filter subexpression: expected: {}, found: {}.", .0.join(", "), .1)] InvalidFilterExpression(&'static [&'static str], Value), + #[error("Filter operator `{operator}` is not allowed for the attribute `{field}`, allowed operators: {}.", allowed_operators.join(", "))] + FilterOperatorNotAllowed { field: String, allowed_operators: Vec, operator: String }, #[error("Attribute `{}` is not sortable. {}", .field, match .valid_fields.is_empty() { diff --git a/crates/milli/src/filterable_attributes_rules.rs b/crates/milli/src/filterable_attributes_rules.rs index fe603c1c2..0b7c9092b 100644 --- a/crates/milli/src/filterable_attributes_rules.rs +++ b/crates/milli/src/filterable_attributes_rules.rs @@ -202,3 +202,107 @@ impl Default for FilterFeatures { Self { equality: true, comparison: false } } } + +pub fn filtered_matching_field_names<'fim>( + filterable_attributes: &[FilterableAttributesRule], + fields_ids_map: &'fim FieldsIdsMap, + filter: &impl Fn(&FilterableAttributesFeatures) -> bool, +) -> BTreeSet<&'fim str> { + let mut result = BTreeSet::new(); + for (_, field_name) in fields_ids_map.iter() { + for filterable_attribute in filterable_attributes { + if filterable_attribute.match_str(field_name) == PatternMatch::Match { + let features = filterable_attribute.features(); + if filter(&features) { + result.insert(field_name); + } + } + } + } + result +} + +pub fn matching_features( + field_name: &str, + filterable_attributes: &[FilterableAttributesRule], +) -> Option { + for filterable_attribute in filterable_attributes { + if filterable_attribute.match_str(field_name) == PatternMatch::Match { + return Some(filterable_attribute.features()); + } + } + None +} + +pub fn is_field_filterable( + field_name: &str, + filterable_attributes: &[FilterableAttributesRule], +) -> bool { + matching_features(field_name, filterable_attributes) + .map_or(false, |features| features.is_filterable()) +} + +pub fn is_field_facet_searchable( + field_name: &str, + filterable_attributes: &[FilterableAttributesRule], +) -> bool { + matching_features(field_name, filterable_attributes) + .map_or(false, |features| features.is_facet_searchable()) +} + +/// Match a field against a set of filterable, facet searchable fields, distinct field, sortable fields, and asc_desc fields. +pub fn match_faceted_field( + field_name: &str, + filterable_fields: &[FilterableAttributesRule], + sortable_fields: &HashSet, + asc_desc_fields: &HashSet, + distinct_field: &Option, +) -> PatternMatch { + // Check if the field matches any filterable or facet searchable field + let mut selection = match_pattern_by_features(field_name, filterable_fields, &|features| { + features.is_facet_searchable() || features.is_filterable() + }); + + // If the field matches the pattern, return Match + if selection == PatternMatch::Match { + return selection; + } + + match match_distinct_field(distinct_field.as_deref(), field_name) { + PatternMatch::Match => return PatternMatch::Match, + PatternMatch::Parent => selection = PatternMatch::Parent, + PatternMatch::NoMatch => (), + } + + // Otherwise, check if the field matches any sortable/asc_desc field + for pattern in sortable_fields.iter().chain(asc_desc_fields.iter()) { + match match_field_legacy(pattern, field_name) { + PatternMatch::Match => return PatternMatch::Match, + PatternMatch::Parent => selection = PatternMatch::Parent, + PatternMatch::NoMatch => (), + } + } + + selection +} + +fn match_pattern_by_features( + field_name: &str, + filterable_attributes: &[FilterableAttributesRule], + filter: &impl Fn(&FilterableAttributesFeatures) -> bool, +) -> PatternMatch { + let mut selection = PatternMatch::NoMatch; + // Check if the field name matches any pattern that is facet searchable or filterable + for pattern in filterable_attributes { + let features = pattern.features(); + if filter(&features) { + match pattern.match_str(field_name) { + PatternMatch::Match => return PatternMatch::Match, + PatternMatch::Parent => selection = PatternMatch::Parent, + PatternMatch::NoMatch => (), + } + } + } + + selection +} diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index d40ddb15d..186d55809 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -945,83 +945,37 @@ impl Index { Ok(fields.into_iter().filter_map(|name| fields_ids_map.id(&name)).collect()) } - /* faceted fields */ - - /// Writes the faceted fields in the database. - pub(crate) fn put_faceted_fields( - &self, - wtxn: &mut RwTxn<'_>, - fields: &HashSet, - ) -> heed::Result<()> { - self.main.remap_types::>().put( - wtxn, - main_key::HIDDEN_FACETED_FIELDS_KEY, - fields, - ) + /// Returns true if the geo feature is enabled. + pub fn is_geo_enabled(&self, rtxn: &RoTxn<'_>) -> Result { + let geo_filter = self.is_geo_filtering_enabled(rtxn)?; + let geo_sortable = self.is_geo_sorting_enabled(rtxn)?; + Ok(geo_filter || geo_sortable) } - /// Returns the faceted fields names. - pub fn faceted_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result> { - Ok(self - .main - .remap_types::>() - .get(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)? - .unwrap_or_default()) + /// Returns true if the geo sorting feature is enabled. + pub fn is_geo_sorting_enabled(&self, rtxn: &RoTxn<'_>) -> Result { + let geo_sortable = self.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME); + Ok(geo_sortable) } - /// Identical to `faceted_fields`, but returns ids instead. - pub fn faceted_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result> { - let fields = self.faceted_fields(rtxn)?; - let fields_ids_map = self.fields_ids_map(rtxn)?; - - let mut fields_ids = HashSet::new(); - for name in fields { - if let Some(field_id) = fields_ids_map.id(&name) { - fields_ids.insert(field_id); - } - } - - Ok(fields_ids) + /// Returns true if the geo filtering feature is enabled. + pub fn is_geo_filtering_enabled(&self, rtxn: &RoTxn<'_>) -> Result { + let geo_filter = + self.filterable_attributes_rules(rtxn)?.iter().any(|field| field.has_geo()); + Ok(geo_filter) } - /* faceted documents ids */ - - /// Returns the user defined faceted fields names. - /// - /// The user faceted fields are the union of all the filterable, sortable, distinct, and Asc/Desc fields. - pub fn user_defined_faceted_fields(&self, rtxn: &RoTxn<'_>) -> Result> { - let filterable_fields = self.filterable_fields(rtxn)?; - let sortable_fields = self.sortable_fields(rtxn)?; - let distinct_field = self.distinct_field(rtxn)?; - let asc_desc_fields = - self.criteria(rtxn)?.into_iter().filter_map(|criterion| match criterion { + pub fn asc_desc_fields(&self, rtxn: &RoTxn<'_>) -> Result> { + let asc_desc_fields = self + .criteria(rtxn)? + .into_iter() + .filter_map(|criterion| match criterion { Criterion::Asc(field) | Criterion::Desc(field) => Some(field), _otherwise => None, - }); + }) + .collect(); - let mut faceted_fields = filterable_fields; - faceted_fields.extend(sortable_fields); - faceted_fields.extend(asc_desc_fields); - if let Some(field) = distinct_field { - faceted_fields.insert(field.to_owned()); - } - - Ok(faceted_fields) - } - - /// Identical to `user_defined_faceted_fields`, but returns ids instead. - pub fn user_defined_faceted_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result> { - let fields = self.user_defined_faceted_fields(rtxn)?; - let fields_ids_map = self.fields_ids_map(rtxn)?; - - let mut fields_ids = HashSet::new(); - for name in fields { - if let Some(field_id) = fields_ids_map.id(&name) { - fields_ids.insert(field_id); - } - } - - Ok(fields_ids) + Ok(asc_desc_fields) } /* faceted documents ids */ diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs index 76f9ed6ff..fa3e4ea28 100644 --- a/crates/milli/src/search/facet/filter.rs +++ b/crates/milli/src/search/facet/filter.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; +use std::collections::BTreeSet; use std::fmt::{Debug, Display}; use std::ops::Bound::{self, Excluded, Included}; @@ -12,13 +12,16 @@ use serde_json::Value; use super::facet_range_search; use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::error::{Error, UserError}; +use crate::filterable_attributes_rules::{ + filtered_matching_field_names, is_field_filterable, matching_features, +}; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec, }; use crate::index::db_name::FACET_ID_STRING_DOCIDS; use crate::{ - distance_between_two_points, lat_lng_to_xyz, FieldId, Index, InternalError, Result, - SerializationError, + distance_between_two_points, lat_lng_to_xyz, FieldId, FilterableAttributesFeatures, + FilterableAttributesRule, Index, InternalError, Result, SerializationError, }; /// The maximum number of filters the filter AST can process. @@ -60,7 +63,7 @@ impl Display for BadGeoError { #[derive(Debug)] enum FilterError<'a> { - AttributeNotFilterable { attribute: &'a str, filterable_fields: HashSet }, + AttributeNotFilterable { attribute: &'a str, filterable_fields: BTreeSet<&'a str> }, ParseGeoError(BadGeoError), TooDeep, } @@ -230,17 +233,22 @@ impl<'a> Filter<'a> { impl<'a> Filter<'a> { pub fn evaluate(&self, rtxn: &heed::RoTxn<'_>, index: &Index) -> Result { // to avoid doing this for each recursive call we're going to do it ONCE ahead of time - let filterable_fields = index.filterable_fields(rtxn)?; + let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; for fid in self.condition.fids(MAX_FILTER_DEPTH) { let attribute = fid.value(); - if !crate::is_faceted(attribute, &filterable_fields) { + if !is_field_filterable(attribute, &filterable_attributes_rules) { + let fields_ids_map = index.fields_ids_map(rtxn)?; return Err(fid.as_external_error(FilterError::AttributeNotFilterable { attribute, - filterable_fields, + filterable_fields: filtered_matching_field_names( + &filterable_attributes_rules, + &fields_ids_map, + &|features| features.is_filterable(), + ), }))?; } } - self.inner_evaluate(rtxn, index, &filterable_fields, None) + self.inner_evaluate(rtxn, index, &filterable_attributes_rules, None) } fn evaluate_operator( @@ -249,6 +257,7 @@ impl<'a> Filter<'a> { field_id: FieldId, universe: Option<&RoaringBitmap>, operator: &Condition<'a>, + features: &FilterableAttributesFeatures, ) -> Result { let numbers_db = index.facet_id_f64_docids; let strings_db = index.facet_id_string_docids; @@ -258,6 +267,28 @@ impl<'a> Filter<'a> { // field id and the level. let (left, right) = match operator { + // return an error if the filter is not allowed for this field + Condition::GreaterThan(_) + | Condition::GreaterThanOrEqual(_) + | Condition::LowerThan(_) + | Condition::LowerThanOrEqual(_) + | Condition::Between { .. } + if !features.is_filterable_comparison() => + { + return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + } + Condition::Empty if !features.is_filterable_empty() => { + return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + } + Condition::Null if !features.is_filterable_null() => { + return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + } + Condition::Exists if !features.is_filterable_exists() => { + return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + } + Condition::Equal(_) | Condition::NotEqual(_) if !features.is_filterable_equality() => { + return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + } Condition::GreaterThan(val) => { (Excluded(val.parse_finite_float()?), Included(f64::MAX)) } @@ -307,7 +338,8 @@ impl<'a> Filter<'a> { } Condition::NotEqual(val) => { let operator = Condition::Equal(val.clone()); - let docids = Self::evaluate_operator(rtxn, index, field_id, None, &operator)?; + let docids = + Self::evaluate_operator(rtxn, index, field_id, None, &operator, features)?; let all_ids = index.documents_ids(rtxn)?; return Ok(all_ids - docids); } @@ -409,7 +441,7 @@ impl<'a> Filter<'a> { &self, rtxn: &heed::RoTxn<'_>, index: &Index, - filterable_fields: &HashSet, + filterable_fields: &[FilterableAttributesRule], universe: Option<&RoaringBitmap>, ) -> Result { if universe.map_or(false, |u| u.is_empty()) { @@ -434,36 +466,56 @@ impl<'a> Filter<'a> { } } FilterCondition::In { fid, els } => { - if crate::is_faceted(fid.value(), filterable_fields) { - let field_ids_map = index.fields_ids_map(rtxn)?; - if let Some(fid) = field_ids_map.id(fid.value()) { - els.iter() - .map(|el| Condition::Equal(el.clone())) - .map(|op| Self::evaluate_operator(rtxn, index, fid, universe, &op)) - .union() - } else { - Ok(RoaringBitmap::new()) + match matching_features(fid.value(), filterable_fields) { + Some(features) if features.is_filterable() => { + let field_ids_map = index.fields_ids_map(rtxn)?; + if let Some(fid) = field_ids_map.id(fid.value()) { + els.iter() + .map(|el| Condition::Equal(el.clone())) + .map(|op| { + Self::evaluate_operator( + rtxn, index, fid, universe, &op, &features, + ) + }) + .union() + } else { + Ok(RoaringBitmap::new()) + } + } + _ => { + let field_ids_map = index.fields_ids_map(rtxn)?; + Err(fid.as_external_error(FilterError::AttributeNotFilterable { + attribute: fid.value(), + filterable_fields: filtered_matching_field_names( + filterable_fields, + &field_ids_map, + &|features| features.is_filterable(), + ), + }))? } - } else { - Err(fid.as_external_error(FilterError::AttributeNotFilterable { - attribute: fid.value(), - filterable_fields: filterable_fields.clone(), - }))? } } FilterCondition::Condition { fid, op } => { - if crate::is_faceted(fid.value(), filterable_fields) { - let field_ids_map = index.fields_ids_map(rtxn)?; - if let Some(fid) = field_ids_map.id(fid.value()) { - Self::evaluate_operator(rtxn, index, fid, universe, op) - } else { - Ok(RoaringBitmap::new()) + match matching_features(fid.value(), filterable_fields) { + Some(features) if features.is_filterable() => { + let field_ids_map = index.fields_ids_map(rtxn)?; + if let Some(fid) = field_ids_map.id(fid.value()) { + Self::evaluate_operator(rtxn, index, fid, universe, op, &features) + } else { + Ok(RoaringBitmap::new()) + } + } + _ => { + let field_ids_map = index.fields_ids_map(rtxn)?; + Err(fid.as_external_error(FilterError::AttributeNotFilterable { + attribute: fid.value(), + filterable_fields: filtered_matching_field_names( + filterable_fields, + &field_ids_map, + &|features| features.is_filterable(), + ), + }))? } - } else { - Err(fid.as_external_error(FilterError::AttributeNotFilterable { - attribute: fid.value(), - filterable_fields: filterable_fields.clone(), - }))? } } FilterCondition::Or(subfilters) => subfilters @@ -502,7 +554,7 @@ impl<'a> Filter<'a> { } } FilterCondition::GeoLowerThan { point, radius } => { - if filterable_fields.contains(RESERVED_GEO_FIELD_NAME) { + if index.is_geo_filtering_enabled(rtxn)? { let base_point: [f64; 2] = [point[0].parse_finite_float()?, point[1].parse_finite_float()?]; if !(-90.0..=90.0).contains(&base_point[0]) { @@ -530,14 +582,19 @@ impl<'a> Filter<'a> { Ok(result) } else { + let field_ids_map = index.fields_ids_map(rtxn)?; Err(point[0].as_external_error(FilterError::AttributeNotFilterable { attribute: RESERVED_GEO_FIELD_NAME, - filterable_fields: filterable_fields.clone(), + filterable_fields: filtered_matching_field_names( + filterable_fields, + &field_ids_map, + &|features| features.is_filterable(), + ), }))? } } FilterCondition::GeoBoundingBox { top_right_point, bottom_left_point } => { - if filterable_fields.contains(RESERVED_GEO_FIELD_NAME) { + if index.is_geo_filtering_enabled(rtxn)? { let top_right: [f64; 2] = [ top_right_point[0].parse_finite_float()?, top_right_point[1].parse_finite_float()?, @@ -662,10 +719,15 @@ impl<'a> Filter<'a> { Ok(selected_lat & selected_lng) } else { + let field_ids_map = index.fields_ids_map(rtxn)?; Err(top_right_point[0].as_external_error( FilterError::AttributeNotFilterable { attribute: RESERVED_GEO_FIELD_NAME, - filterable_fields: filterable_fields.clone(), + filterable_fields: filtered_matching_field_names( + filterable_fields, + &field_ids_map, + &|features| features.is_filterable(), + ), }, ))? } @@ -674,6 +736,26 @@ impl<'a> Filter<'a> { } } +fn generate_filter_error( + rtxn: &heed::RoTxn<'_>, + index: &Index, + field_id: FieldId, + operator: &Condition<'_>, + features: &FilterableAttributesFeatures, +) -> Error { + match index.fields_ids_map(rtxn) { + Ok(fields_ids_map) => { + let field = fields_ids_map.name(field_id).unwrap_or_default(); + Error::UserError(UserError::FilterOperatorNotAllowed { + field: field.to_string(), + allowed_operators: features.allowed_filter_operators(), + operator: operator.operator().to_string(), + }) + } + Err(e) => e.into(), + } +} + impl<'a> From> for Filter<'a> { fn from(fc: FilterCondition<'a>) -> Self { Self { condition: fc } @@ -687,12 +769,12 @@ mod tests { use big_s::S; use either::Either; - use maplit::hashset; + use meili_snap::snapshot; use roaring::RoaringBitmap; use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::index::tests::TempIndex; - use crate::Filter; + use crate::{Filter, FilterableAttributesRule}; #[test] fn empty_db() { @@ -700,7 +782,9 @@ mod tests { //Set the filterable fields to be the channel. index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("PrIcE") }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "PrIcE".to_string(), + )]); }) .unwrap(); @@ -784,27 +868,32 @@ mod tests { let rtxn = index.read_txn().unwrap(); let filter = Filter::from_str("_geoRadius(42, 150, 10)").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); - assert!(error.to_string().starts_with( - "Attribute `_geo` is not filterable. This index does not have configured filterable attributes." - )); + snapshot!(error.to_string(), @r###" + Attribute `_geo` is not filterable. This index does not have configured filterable attributes. + 12:14 _geoRadius(42, 150, 10) + "###); let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); - assert!(error.to_string().starts_with( - "Attribute `_geo` is not filterable. This index does not have configured filterable attributes." - )); + snapshot!(error.to_string(), @r###" + Attribute `_geo` is not filterable. This index does not have configured filterable attributes. + 18:20 _geoBoundingBox([42, 150], [30, 10]) + "###); let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); - assert!(error.to_string().starts_with( - "Attribute `dog` is not filterable. This index does not have configured filterable attributes." - )); + snapshot!(error.to_string(), @r###" + Attribute `dog` is not filterable. This index does not have configured filterable attributes. + 1:4 dog = "bernese mountain" + "###); drop(rtxn); index .update_settings(|settings| { settings.set_searchable_fields(vec![S("title")]); - settings.set_filterable_fields(hashset! { S("title") }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "title".to_string(), + )]); }) .unwrap(); @@ -812,39 +901,45 @@ mod tests { let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); - assert!(error.to_string().starts_with( - "Attribute `_geo` is not filterable. Available filterable attributes are: `title`." - )); + snapshot!(error.to_string(), @r###" + Attribute `_geo` is not filterable. This index does not have configured filterable attributes. + 12:16 _geoRadius(-100, 150, 10) + "###); let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); - assert!(error.to_string().starts_with( - "Attribute `_geo` is not filterable. Available filterable attributes are: `title`." - )); + snapshot!(error.to_string(), @r###" + Attribute `_geo` is not filterable. This index does not have configured filterable attributes. + 18:20 _geoBoundingBox([42, 150], [30, 10]) + "###); let filter = Filter::from_str("name = 12").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); - assert!(error.to_string().starts_with( - "Attribute `name` is not filterable. Available filterable attributes are: `title`." - )); + snapshot!(error.to_string(), @r###" + Attribute `name` is not filterable. This index does not have configured filterable attributes. + 1:5 name = 12 + "###); let filter = Filter::from_str("title = \"test\" AND name = 12").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); - assert!(error.to_string().starts_with( - "Attribute `name` is not filterable. Available filterable attributes are: `title`." - )); + snapshot!(error.to_string(), @r###" + Attribute `name` is not filterable. This index does not have configured filterable attributes. + 20:24 title = "test" AND name = 12 + "###); let filter = Filter::from_str("title = \"test\" AND name IN [12]").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); - assert!(error.to_string().starts_with( - "Attribute `name` is not filterable. Available filterable attributes are: `title`." - )); + snapshot!(error.to_string(), @r###" + Attribute `name` is not filterable. This index does not have configured filterable attributes. + 20:24 title = "test" AND name IN [12] + "###); let filter = Filter::from_str("title = \"test\" AND name != 12").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); - assert!(error.to_string().starts_with( - "Attribute `name` is not filterable. Available filterable attributes are: `title`." - )); + snapshot!(error.to_string(), @r###" + Attribute `name` is not filterable. This index does not have configured filterable attributes. + 20:24 title = "test" AND name != 12 + "###); } #[test] @@ -870,7 +965,9 @@ mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(hashset!(S("monitor_diagonal"))); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "monitor_diagonal".to_string(), + )]); }) .unwrap(); @@ -901,7 +998,9 @@ mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S( + RESERVED_GEO_FIELD_NAME, + ))]); }) .unwrap(); @@ -948,7 +1047,10 @@ mod tests { index .update_settings(|settings| { settings.set_searchable_fields(vec![S(RESERVED_GEO_FIELD_NAME), S("price")]); // to keep the fields order - settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("price") }); + settings.set_filterable_fields(vec![ + FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)), + FilterableAttributesRule::Field("price".to_string()), + ]); }) .unwrap(); @@ -998,7 +1100,10 @@ mod tests { index .update_settings(|settings| { settings.set_searchable_fields(vec![S(RESERVED_GEO_FIELD_NAME), S("price")]); // to keep the fields order - settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("price") }); + settings.set_filterable_fields(vec![ + FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)), + FilterableAttributesRule::Field("price".to_string()), + ]); }) .unwrap(); @@ -1108,7 +1213,9 @@ mod tests { index .update_settings(|settings| { settings.set_searchable_fields(vec![S("price")]); // to keep the fields order - settings.set_filterable_fields(hashset! { S("price") }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "price".to_string(), + )]); }) .unwrap(); index @@ -1164,7 +1271,11 @@ mod tests { index .update_settings(|settings| { settings.set_primary_key("id".to_owned()); - settings.set_filterable_fields(hashset! { S("id"), S("one"), S("two") }); + settings.set_filterable_fields(vec![ + FilterableAttributesRule::Field("id".to_string()), + FilterableAttributesRule::Field("one".to_string()), + FilterableAttributesRule::Field("two".to_string()), + ]); }) .unwrap(); diff --git a/crates/milli/src/search/facet/search.rs b/crates/milli/src/search/facet/search.rs index cdba7ee16..a11e5cd49 100644 --- a/crates/milli/src/search/facet/search.rs +++ b/crates/milli/src/search/facet/search.rs @@ -10,6 +10,9 @@ use roaring::RoaringBitmap; use tracing::error; use crate::error::UserError; +use crate::filterable_attributes_rules::{ + filtered_matching_field_names, is_field_facet_searchable, +}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; use crate::search::build_dfa; use crate::{DocumentId, FieldId, OrderBy, Result, Search}; @@ -73,10 +76,16 @@ impl<'a> SearchForFacetValues<'a> { let index = self.search_query.index; let rtxn = self.search_query.rtxn; - let filterable_fields = index.filterable_fields(rtxn)?; - if !filterable_fields.contains(&self.facet) { + let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; + if !is_field_facet_searchable(&self.facet, &filterable_attributes_rules) { + let fields_ids_map = index.fields_ids_map(rtxn)?; + let matching_field_names = filtered_matching_field_names( + &filterable_attributes_rules, + &fields_ids_map, + &|features| features.is_facet_searchable(), + ); let (valid_fields, hidden_fields) = - index.remove_hidden_fields(rtxn, filterable_fields)?; + index.remove_hidden_fields(rtxn, matching_field_names)?; return Err(UserError::InvalidFacetSearchFacetName { field: self.facet.clone(), diff --git a/crates/milli/src/search/mod.rs b/crates/milli/src/search/mod.rs index d5b05f515..15f3b1b4a 100644 --- a/crates/milli/src/search/mod.rs +++ b/crates/milli/src/search/mod.rs @@ -9,6 +9,7 @@ use roaring::bitmap::RoaringBitmap; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; use self::new::{execute_vector_search, PartialSearchResult}; +use crate::filterable_attributes_rules::{filtered_matching_field_names, is_field_filterable}; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::vector::Embedder; use crate::{ @@ -187,10 +188,19 @@ impl<'a> Search<'a> { } if let Some(distinct) = &self.distinct { - let filterable_fields = ctx.index.filterable_fields(ctx.txn)?; - if !crate::is_faceted(distinct, &filterable_fields) { + let filterable_fields = ctx.index.filterable_attributes_rules(ctx.txn)?; + // check if the distinct field is in the filterable fields + if !is_field_filterable(distinct, &filterable_fields) { + // if not, remove the hidden fields from the filterable fields to generate the error message + let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?; + let matching_field_names = filtered_matching_field_names( + &filterable_fields, + &fields_ids_map, + &|features| features.is_filterable(), + ); let (valid_fields, hidden_fields) = - ctx.index.remove_hidden_fields(ctx.txn, filterable_fields)?; + ctx.index.remove_hidden_fields(ctx.txn, matching_field_names)?; + // and return the error return Err(Error::UserError(UserError::InvalidDistinctAttribute { field: distinct.clone(), valid_fields,