From b88aa9cc76b6af7b3b5f94072b6b63e427fc7b02 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 5 Mar 2025 18:22:12 +0100 Subject: [PATCH] Rely on FieldIdMapWithMetadata in facet search and filters --- crates/milli/src/error.rs | 4 +- crates/milli/src/fields_ids_map/metadata.rs | 27 ++++- .../milli/src/filterable_attributes_rules.rs | 14 +-- crates/milli/src/index.rs | 12 +- crates/milli/src/search/facet/filter.rs | 113 ++++++++---------- crates/milli/src/search/facet/search.rs | 49 ++++---- crates/milli/src/search/mod.rs | 2 +- .../milli/src/update/index_documents/mod.rs | 12 +- 8 files changed, 128 insertions(+), 105 deletions(-) diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs index b34c2bd9a..3121f5405 100644 --- a/crates/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -138,12 +138,12 @@ and can not be more than 511 bytes.", .document_id.to_string() InvalidFilter(String), #[error("Invalid type for filter subexpression: expected: {}, found: {}.", .0.join(", "), .1)] InvalidFilterExpression(&'static [&'static str], Value), - #[error("Filter operator `{operator}` is not allowed for the attribute `{field}`.\n - Note: allowed operators: {}.\n - Note: field `{field}` matched rule #{rule_index} in `filterableAttributes`", allowed_operators.join(", "))] + #[error("Filter operator `{operator}` is not allowed for the attribute `{field}`.\n - Note: allowed operators: {}.\n - Note: field `{field}` {} in `filterableAttributes`", allowed_operators.join(", "), rule_index.map_or("did not match any rule".to_string(), |rule_index| format!("matched rule #{rule_index}")))] FilterOperatorNotAllowed { field: String, allowed_operators: Vec, operator: String, - rule_index: usize, + rule_index: Option, }, #[error("Attribute `{}` is not sortable. {}", .field, diff --git a/crates/milli/src/fields_ids_map/metadata.rs b/crates/milli/src/fields_ids_map/metadata.rs index fd333c3c6..5636256eb 100644 --- a/crates/milli/src/fields_ids_map/metadata.rs +++ b/crates/milli/src/fields_ids_map/metadata.rs @@ -126,20 +126,35 @@ impl Metadata { &self, rules: &'rules [FilterableAttributesRule], ) -> Option<&'rules FilterableAttributesRule> { + self.filterable_attributes_with_rule_index(rules).map(|(_, rule)| rule) + } + + pub fn filterable_attributes_with_rule_index<'rules>( + &self, + rules: &'rules [FilterableAttributesRule], + ) -> Option<(usize, &'rules FilterableAttributesRule)> { let filterable_attributes_rule_id = self.filterable_attributes_rule_id?.get(); - // - 1: `filterable_attributes_rule_id` is NonZero - let rule = rules.get((filterable_attributes_rule_id - 1) as usize).unwrap(); - Some(rule) + let rule_id = (filterable_attributes_rule_id - 1) as usize; + let rule = rules.get(rule_id).unwrap(); + Some((rule_id, rule)) } pub fn filterable_attributes_features( &self, rules: &[FilterableAttributesRule], ) -> FilterableAttributesFeatures { - self.filterable_attributes(rules) - .map(|rule| rule.features()) + let (_, features) = self.filterable_attributes_features_with_rule_index(rules); + features + } + + pub fn filterable_attributes_features_with_rule_index( + &self, + rules: &[FilterableAttributesRule], + ) -> (Option, FilterableAttributesFeatures) { + self.filterable_attributes_with_rule_index(rules) + .map(|(rule_index, rule)| (Some(rule_index), rule.features())) // if there is no filterable attributes rule, return no features - .unwrap_or_else(FilterableAttributesFeatures::no_features) + .unwrap_or_else(|| (None, FilterableAttributesFeatures::no_features())) } pub fn is_sortable(&self) -> bool { diff --git a/crates/milli/src/filterable_attributes_rules.rs b/crates/milli/src/filterable_attributes_rules.rs index dbc6d72af..50f9b8e9d 100644 --- a/crates/milli/src/filterable_attributes_rules.rs +++ b/crates/milli/src/filterable_attributes_rules.rs @@ -6,7 +6,8 @@ use utoipa::ToSchema; use crate::{ attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch}, constants::RESERVED_GEO_FIELD_NAME, - AttributePatterns, FieldsIdsMap, + fields_ids_map::metadata::FieldIdMapWithMetadata, + AttributePatterns, }; #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, ToSchema)] @@ -235,15 +236,14 @@ impl Default for FilterFeatures { /// * `filter` - The filter function to apply to the filterable attributes rules. pub fn filtered_matching_field_names<'fim>( filterable_attributes: &[FilterableAttributesRule], - fields_ids_map: &'fim FieldsIdsMap, + fields_ids_map: &'fim FieldIdMapWithMetadata, filter: &impl Fn(FilterableAttributesFeatures) -> bool, ) -> BTreeSet<&'fim str> { let mut result = BTreeSet::new(); - for (_, field_name) in fields_ids_map.iter() { - if let Some((_, features)) = matching_features(field_name, filterable_attributes) { - if filter(features) { - result.insert(field_name); - } + for (_, field_name, metadata) in fields_ids_map.iter() { + let features = metadata.filterable_attributes_features(filterable_attributes); + if filter(features) { + result.insert(field_name); } } result diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index 5bc434517..f9109a137 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -13,7 +13,7 @@ use crate::constants::{self, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAM use crate::database_stats::DatabaseStats; use crate::documents::PrimaryKey; use crate::error::{InternalError, UserError}; -use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; +use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; use crate::fields_ids_map::FieldsIdsMap; use crate::heed_codec::facet::{ FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, @@ -514,6 +514,16 @@ impl Index { .unwrap_or_default()) } + /// Returns the fields ids map with metadata. + /// + /// This structure is not yet stored in the index, and is generated on the fly. + pub fn fields_ids_map_with_metadata(&self, rtxn: &RoTxn<'_>) -> Result { + Ok(FieldIdMapWithMetadata::new( + self.fields_ids_map(rtxn)?, + MetadataBuilder::from_index(self, rtxn)?, + )) + } + /* fieldids weights map */ // This maps the fields ids to their weights. // Their weights is defined by the ordering of the searchable attributes. diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs index bc7209ef9..b8c9cddfc 100644 --- a/crates/milli/src/search/facet/filter.rs +++ b/crates/milli/src/search/facet/filter.rs @@ -12,17 +12,15 @@ use serde_json::Value; use super::facet_range_search; use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::error::{Error, UserError}; -use crate::filterable_attributes_rules::{ - filtered_matching_field_names, is_field_filterable, matching_features, -}; +use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; +use crate::filterable_attributes_rules::{filtered_matching_field_names, is_field_filterable}; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec, }; use crate::index::db_name::FACET_ID_STRING_DOCIDS; use crate::{ - distance_between_two_points, lat_lng_to_xyz, FieldId, FieldsIdsMap, - FilterableAttributesFeatures, FilterableAttributesRule, Index, InternalError, Result, - SerializationError, + distance_between_two_points, lat_lng_to_xyz, FieldId, FilterableAttributesFeatures, + FilterableAttributesRule, Index, InternalError, Result, SerializationError, }; /// The maximum number of filters the filter AST can process. @@ -234,21 +232,32 @@ impl<'a> Filter<'a> { impl<'a> Filter<'a> { pub fn evaluate(&self, rtxn: &heed::RoTxn<'_>, index: &Index) -> Result { // to avoid doing this for each recursive call we're going to do it ONCE ahead of time - let fields_ids_map = index.fields_ids_map(rtxn)?; + let fields_ids_map = index.fields_ids_map_with_metadata(rtxn)?; let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; for fid in self.condition.fids(MAX_FILTER_DEPTH) { let attribute = fid.value(); - if !is_field_filterable(attribute, &filterable_attributes_rules) { - return Err(fid.as_external_error(FilterError::AttributeNotFilterable { - attribute, - filterable_fields: filtered_matching_field_names( - &filterable_attributes_rules, - &fields_ids_map, - &|features| features.is_filterable(), - ), - }))?; + if let Some((_, metadata)) = fields_ids_map.id_with_metadata(fid.value()) { + if metadata + .filterable_attributes_features(&filterable_attributes_rules) + .is_filterable() + { + continue; + } + } else if is_field_filterable(attribute, &filterable_attributes_rules) { + continue; } + + // If the field is not filterable, return an error + return Err(fid.as_external_error(FilterError::AttributeNotFilterable { + attribute, + filterable_fields: filtered_matching_field_names( + &filterable_attributes_rules, + &fields_ids_map, + &|features| features.is_filterable(), + ), + }))?; } + self.inner_evaluate(rtxn, index, &fields_ids_map, &filterable_attributes_rules, None) } @@ -259,7 +268,7 @@ impl<'a> Filter<'a> { universe: Option<&RoaringBitmap>, operator: &Condition<'a>, features: &FilterableAttributesFeatures, - rule_index: usize, + rule_index: Option, ) -> Result { let numbers_db = index.facet_id_f64_docids; let strings_db = index.facet_id_string_docids; @@ -454,7 +463,7 @@ impl<'a> Filter<'a> { &self, rtxn: &heed::RoTxn<'_>, index: &Index, - field_ids_map: &FieldsIdsMap, + field_ids_map: &FieldIdMapWithMetadata, filterable_attribute_rules: &[FilterableAttributesRule], universe: Option<&RoaringBitmap>, ) -> Result { @@ -480,51 +489,33 @@ impl<'a> Filter<'a> { } } } - FilterCondition::In { fid, els } => { - match matching_features(fid.value(), filterable_attribute_rules) { - Some((rule_index, features)) if features.is_filterable() => { - if let Some(fid) = field_ids_map.id(fid.value()) { - els.iter() - .map(|el| Condition::Equal(el.clone())) - .map(|op| { - Self::evaluate_operator( - rtxn, index, fid, universe, &op, &features, rule_index, - ) - }) - .union() - } else { - Ok(RoaringBitmap::new()) - } - } - _ => Err(fid.as_external_error(FilterError::AttributeNotFilterable { - attribute: fid.value(), - filterable_fields: filtered_matching_field_names( - filterable_attribute_rules, - &field_ids_map, - &|features| features.is_filterable(), - ), - }))?, - } - } - FilterCondition::Condition { fid, op } => { - match matching_features(fid.value(), filterable_attribute_rules) { - Some((rule_index, features)) if features.is_filterable() => { - if let Some(fid) = field_ids_map.id(fid.value()) { + FilterCondition::In { fid, els } => match field_ids_map.id_with_metadata(fid.value()) { + Some((fid, metadata)) => { + let (rule_index, features) = metadata + .filterable_attributes_features_with_rule_index(filterable_attribute_rules); + els.iter() + .map(|el| Condition::Equal(el.clone())) + .map(|op| { Self::evaluate_operator( - rtxn, index, fid, universe, op, &features, rule_index, + rtxn, index, fid, universe, &op, &features, rule_index, ) - } else { - Ok(RoaringBitmap::new()) - } + }) + .union() + } + None => Ok(RoaringBitmap::new()), + }, + FilterCondition::Condition { fid, op } => { + match field_ids_map.id_with_metadata(fid.value()) { + Some((fid, metadata)) => { + let (rule_index, features) = metadata + .filterable_attributes_features_with_rule_index( + filterable_attribute_rules, + ); + Self::evaluate_operator( + rtxn, index, fid, universe, op, &features, rule_index, + ) } - _ => Err(fid.as_external_error(FilterError::AttributeNotFilterable { - attribute: fid.value(), - filterable_fields: filtered_matching_field_names( - filterable_attribute_rules, - &field_ids_map, - &|features| features.is_filterable(), - ), - }))?, + None => Ok(RoaringBitmap::new()), } } FilterCondition::Or(subfilters) => subfilters @@ -764,7 +755,7 @@ fn generate_filter_error( field_id: FieldId, operator: &Condition<'_>, features: &FilterableAttributesFeatures, - rule_index: usize, + rule_index: Option, ) -> Error { match index.fields_ids_map(rtxn) { Ok(fields_ids_map) => { diff --git a/crates/milli/src/search/facet/search.rs b/crates/milli/src/search/facet/search.rs index a11e5cd49..da1e1610b 100644 --- a/crates/milli/src/search/facet/search.rs +++ b/crates/milli/src/search/facet/search.rs @@ -77,30 +77,37 @@ impl<'a> SearchForFacetValues<'a> { let rtxn = self.search_query.rtxn; let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; - if !is_field_facet_searchable(&self.facet, &filterable_attributes_rules) { - let fields_ids_map = index.fields_ids_map(rtxn)?; - let matching_field_names = filtered_matching_field_names( - &filterable_attributes_rules, - &fields_ids_map, - &|features| features.is_facet_searchable(), - ); - let (valid_fields, hidden_fields) = - index.remove_hidden_fields(rtxn, matching_field_names)?; - - return Err(UserError::InvalidFacetSearchFacetName { - field: self.facet.clone(), - valid_fields, - hidden_fields, + let fields_ids_map = index.fields_ids_map_with_metadata(rtxn)?; + let fid = match fields_ids_map.id_with_metadata(&self.facet) { + Some((fid, metadata)) + if metadata + .filterable_attributes_features(&filterable_attributes_rules) + .is_facet_searchable() => + { + fid } - .into()); - } - - let fields_ids_map = index.fields_ids_map(rtxn)?; - let fid = match fields_ids_map.id(&self.facet) { - Some(fid) => fid, // we return an empty list of results when the attribute has been // set as filterable but no document contains this field (yet). - None => return Ok(Vec::new()), + None if is_field_facet_searchable(&self.facet, &filterable_attributes_rules) => { + return Ok(Vec::new()); + } + // we return an error when the attribute is not facet searchable + _otherwise => { + let matching_field_names = filtered_matching_field_names( + &filterable_attributes_rules, + &fields_ids_map, + &|features| features.is_facet_searchable(), + ); + let (valid_fields, hidden_fields) = + index.remove_hidden_fields(rtxn, matching_field_names)?; + + return Err(UserError::InvalidFacetSearchFacetName { + field: self.facet.clone(), + valid_fields, + hidden_fields, + } + .into()); + } }; let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? { diff --git a/crates/milli/src/search/mod.rs b/crates/milli/src/search/mod.rs index 15f3b1b4a..7d98f3453 100644 --- a/crates/milli/src/search/mod.rs +++ b/crates/milli/src/search/mod.rs @@ -192,7 +192,7 @@ impl<'a> Search<'a> { // check if the distinct field is in the filterable fields if !is_field_filterable(distinct, &filterable_fields) { // if not, remove the hidden fields from the filterable fields to generate the error message - let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?; + let fields_ids_map = ctx.index.fields_ids_map_with_metadata(ctx.txn)?; let matching_field_names = filtered_matching_field_names( &filterable_fields, &fields_ids_map, diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs index 19ab1ff34..5ec8b1c7c 100644 --- a/crates/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -1256,7 +1256,7 @@ mod tests { let rtxn = index.read_txn().unwrap(); let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); - let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { features.is_filterable() @@ -1479,7 +1479,7 @@ mod tests { let rtxn = index.read_txn().unwrap(); let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); - let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { features.is_filterable() @@ -1507,7 +1507,7 @@ mod tests { let rtxn = index.read_txn().unwrap(); let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); - let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { features.is_filterable() @@ -1745,7 +1745,7 @@ mod tests { let check_ok = |index: &Index| { let rtxn = index.read_txn().unwrap(); let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); - let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { features.is_filterable() @@ -1856,7 +1856,7 @@ mod tests { let check_ok = |index: &Index| { let rtxn = index.read_txn().unwrap(); let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); - let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { features.is_filterable() @@ -1925,7 +1925,7 @@ mod tests { let check_ok = |index: &Index| { let rtxn = index.read_txn().unwrap(); let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); - let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { features.is_filterable()