From 0200c65ebf24924c6077e52b994c51f8cf1691fd Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 3 Mar 2025 10:22:02 +0100 Subject: [PATCH] Change the filterableAttributes setting API **Changes:** The filterableAttributes type has been changed from a `BTreeSet` to a `Vec`, Which is a list of rules defining patterns to match the documents' fields and a set of feature to apply on the matching fields. The rule order given by the user is now an important information, the features applied on a filterable field will be chosen based on the rule order as we do for the LocalizedAttributesRules. This means that the list will not be reordered anymore and will keep the user defined order, moreover, if there are any duplicates, they will not be de-duplicated anymore. **Impact:** - Settings API - the database format of the filterable attributes changed - may impact the LocalizedAttributesRules due to the AttributePatterns factorization - OpenAPI generator --- crates/meilisearch-types/src/locales.rs | 4 +- crates/meilisearch-types/src/settings.rs | 8 +- .../src/routes/indexes/settings.rs | 2 +- .../src/routes/indexes/settings_analytics.rs | 15 +- crates/meilisearch/src/routes/mod.rs | 6 +- crates/milli/src/attribute_patterns.rs | 128 +++++++++++ .../milli/src/filterable_attributes_rules.rs | 204 ++++++++++++++++++ crates/milli/src/index.rs | 35 ++- crates/milli/src/lib.rs | 9 +- .../milli/src/localized_attributes_rules.rs | 54 +---- 10 files changed, 386 insertions(+), 79 deletions(-) create mode 100644 crates/milli/src/attribute_patterns.rs create mode 100644 crates/milli/src/filterable_attributes_rules.rs diff --git a/crates/meilisearch-types/src/locales.rs b/crates/meilisearch-types/src/locales.rs index 945c38cc3..b3fb90493 100644 --- a/crates/meilisearch-types/src/locales.rs +++ b/crates/meilisearch-types/src/locales.rs @@ -1,5 +1,5 @@ use deserr::Deserr; -use milli::LocalizedAttributesRule; +use milli::{AttributePatterns, LocalizedAttributesRule}; use serde::{Deserialize, Serialize}; use utoipa::ToSchema; @@ -7,7 +7,7 @@ use utoipa::ToSchema; #[deserr(rename_all = camelCase)] #[serde(rename_all = "camelCase")] pub struct LocalizedAttributesRuleView { - pub attribute_patterns: Vec, + pub attribute_patterns: AttributePatterns, pub locales: Vec, } diff --git a/crates/meilisearch-types/src/settings.rs b/crates/meilisearch-types/src/settings.rs index e501d7359..7b5807d06 100644 --- a/crates/meilisearch-types/src/settings.rs +++ b/crates/meilisearch-types/src/settings.rs @@ -11,7 +11,7 @@ use fst::IntoStreamer; use milli::index::{IndexEmbeddingConfig, PrefixSearch}; use milli::proximity::ProximityPrecision; use milli::update::Setting; -use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET}; +use milli::{Criterion, CriterionError, FilterableAttributesRule, Index, DEFAULT_VALUES_PER_FACET}; use serde::{Deserialize, Serialize, Serializer}; use utoipa::ToSchema; @@ -202,8 +202,8 @@ pub struct Settings { /// Attributes to use for faceting and filtering. See [Filtering and Faceted Search](https://www.meilisearch.com/docs/learn/filtering_and_sorting/search_with_facet_filters). #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] - #[schema(value_type = Option>, example = json!(["release_date", "genre"]))] - pub filterable_attributes: Setting>, + #[schema(value_type = Option>, example = json!(["release_date", "genre"]))] + pub filterable_attributes: Setting>, /// Attributes to use when sorting search results. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] @@ -791,7 +791,7 @@ pub fn settings( .user_defined_searchable_fields(rtxn)? .map(|fields| fields.into_iter().map(String::from).collect()); - let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect(); + let filterable_attributes = index.filterable_attributes_rules(rtxn)?.into_iter().collect(); let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect(); diff --git a/crates/meilisearch/src/routes/indexes/settings.rs b/crates/meilisearch/src/routes/indexes/settings.rs index ad76b3f42..6ecc77ec3 100644 --- a/crates/meilisearch/src/routes/indexes/settings.rs +++ b/crates/meilisearch/src/routes/indexes/settings.rs @@ -291,7 +291,7 @@ make_setting_routes!( { route: "/filterable-attributes", update_verb: put, - value_type: std::collections::BTreeSet, + value_type: Vec, err_type: meilisearch_types::deserr::DeserrJsonError< meilisearch_types::error::deserr_codes::InvalidSettingsFilterableAttributes, >, diff --git a/crates/meilisearch/src/routes/indexes/settings_analytics.rs b/crates/meilisearch/src/routes/indexes/settings_analytics.rs index ffeadcab6..627f9103e 100644 --- a/crates/meilisearch/src/routes/indexes/settings_analytics.rs +++ b/crates/meilisearch/src/routes/indexes/settings_analytics.rs @@ -8,6 +8,7 @@ use std::collections::{BTreeMap, BTreeSet, HashSet}; use meilisearch_types::facet_values_sort::FacetValuesSort; use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView}; use meilisearch_types::milli::update::Setting; +use meilisearch_types::milli::FilterableAttributesRule; use meilisearch_types::settings::{ FacetingSettings, PaginationSettings, PrefixSearchSettings, ProximityPrecisionView, RankingRuleView, SettingEmbeddingSettings, TypoSettings, @@ -89,6 +90,10 @@ impl Aggregate for SettingsAnalytics { filterable_attributes: FilterableAttributesAnalytics { total: new.filterable_attributes.total.or(self.filterable_attributes.total), has_geo: new.filterable_attributes.has_geo.or(self.filterable_attributes.has_geo), + has_patterns: new + .filterable_attributes + .has_patterns + .or(self.filterable_attributes.has_patterns), }, distinct_attribute: DistinctAttributeAnalytics { set: self.distinct_attribute.set | new.distinct_attribute.set, @@ -328,13 +333,19 @@ impl SortableAttributesAnalytics { pub struct FilterableAttributesAnalytics { pub total: Option, pub has_geo: Option, + pub has_patterns: Option, } impl FilterableAttributesAnalytics { - pub fn new(setting: Option<&BTreeSet>) -> Self { + pub fn new(setting: Option<&Vec>) -> Self { Self { total: setting.as_ref().map(|filter| filter.len()), - has_geo: setting.as_ref().map(|filter| filter.contains("_geo")), + has_geo: setting + .as_ref() + .map(|filter| filter.iter().any(FilterableAttributesRule::has_geo)), + has_patterns: setting.as_ref().map(|filter| { + filter.iter().any(|rule| matches!(rule, FilterableAttributesRule::Pattern(_))) + }), } } diff --git a/crates/meilisearch/src/routes/mod.rs b/crates/meilisearch/src/routes/mod.rs index 02cb4130a..cc9aeb7d2 100644 --- a/crates/meilisearch/src/routes/mod.rs +++ b/crates/meilisearch/src/routes/mod.rs @@ -9,6 +9,10 @@ use meilisearch_types::batches::BatchStats; use meilisearch_types::error::{Code, ErrorType, ResponseError}; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::keys::CreateApiKey; +use meilisearch_types::milli::{ + AttributePatterns, FilterFeatures, FilterableAttributesFeatures, FilterableAttributesPatterns, + FilterableAttributesRule, +}; use meilisearch_types::settings::{ Checked, FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, Settings, TypoSettings, Unchecked, @@ -88,7 +92,7 @@ pub mod tasks; url = "/", description = "Local server", )), - components(schemas(PaginationView, PaginationView, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings, Settings, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote)) + components(schemas(PaginationView, PaginationView, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings, Settings, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures)) )] pub struct MeilisearchApi; diff --git a/crates/milli/src/attribute_patterns.rs b/crates/milli/src/attribute_patterns.rs new file mode 100644 index 000000000..baf239c3f --- /dev/null +++ b/crates/milli/src/attribute_patterns.rs @@ -0,0 +1,128 @@ +use deserr::Deserr; +use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; + +use crate::is_faceted_by; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +#[repr(transparent)] +#[serde(transparent)] +pub struct AttributePatterns { + #[schema(value_type = Vec)] + pub patterns: Vec, +} + +impl Deserr for AttributePatterns { + fn deserialize_from_value( + value: deserr::Value, + location: deserr::ValuePointerRef, + ) -> Result { + Vec::::deserialize_from_value(value, location).map(|patterns| Self { patterns }) + } +} + +impl From> for AttributePatterns { + fn from(patterns: Vec) -> Self { + Self { patterns } + } +} + +impl AttributePatterns { + pub fn match_str(&self, str: &str) -> PatternMatch { + let mut pattern_match = PatternMatch::NoMatch; + for pattern in &self.patterns { + match match_pattern(pattern, str) { + PatternMatch::Match => return PatternMatch::Match, + PatternMatch::Parent => pattern_match = PatternMatch::Parent, + PatternMatch::NoMatch => {} + } + } + pattern_match + } +} + +fn match_pattern(pattern: &str, str: &str) -> PatternMatch { + if pattern == "*" { + return PatternMatch::Match; + } else if pattern.starts_with('*') && pattern.ends_with('*') { + if str.contains(&pattern[1..pattern.len() - 1]) { + return PatternMatch::Match; + } + } else if let Some(pattern) = pattern.strip_prefix('*') { + if str.ends_with(pattern) { + return PatternMatch::Match; + } + } else if let Some(pattern) = pattern.strip_suffix('*') { + if str.starts_with(pattern) { + return PatternMatch::Match; + } + } else if pattern == str { + return PatternMatch::Match; + } + + // If the field is a parent field of the pattern, return Parent + if is_faceted_by(pattern, str) { + PatternMatch::Parent + } else { + PatternMatch::NoMatch + } +} + +pub fn match_field_legacy(pattern: &str, field: &str) -> PatternMatch { + if is_faceted_by(field, pattern) { + // If the field matches the pattern or is a nested field of the pattern, return Match (legacy behavior) + PatternMatch::Match + } else if is_faceted_by(pattern, field) { + // If the field is a parent field of the pattern, return Parent + PatternMatch::Parent + } else { + // If the field does not match the pattern and is not a parent of a nested field that matches the pattern, return NoMatch + PatternMatch::NoMatch + } +} + +/// Match a field against a distinct field. +pub fn match_distinct_field(distinct_field: Option<&str>, field: &str) -> PatternMatch { + if let Some(distinct_field) = distinct_field { + if field == distinct_field { + // If the field matches exactly the distinct field, return Match + return PatternMatch::Match; + } else if is_faceted_by(distinct_field, field) { + // If the field is a parent field of the distinct field, return Parent + return PatternMatch::Parent; + } + } + // If the field does not match the distinct field and is not a parent of a nested field that matches the distinct field, return NoMatch + PatternMatch::NoMatch +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PatternMatch { + /// The field is a parent of the of a nested field that matches the pattern + Parent, + /// The field matches the pattern + Match, + /// The field does not match the pattern + NoMatch, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_match_pattern() { + assert_eq!(match_pattern("*", "test"), PatternMatch::Match); + assert_eq!(match_pattern("test*", "test"), PatternMatch::Match); + assert_eq!(match_pattern("test*", "testa"), PatternMatch::Match); + assert_eq!(match_pattern("*test", "test"), PatternMatch::Match); + assert_eq!(match_pattern("*test", "atest"), PatternMatch::Match); + assert_eq!(match_pattern("*test*", "test"), PatternMatch::Match); + assert_eq!(match_pattern("*test*", "atesta"), PatternMatch::Match); + assert_eq!(match_pattern("*test*", "atest"), PatternMatch::Match); + assert_eq!(match_pattern("*test*", "testa"), PatternMatch::Match); + assert_eq!(match_pattern("test*test", "test"), PatternMatch::NoMatch); + assert_eq!(match_pattern("*test", "testa"), PatternMatch::NoMatch); + assert_eq!(match_pattern("test*", "atest"), PatternMatch::NoMatch); + } +} diff --git a/crates/milli/src/filterable_attributes_rules.rs b/crates/milli/src/filterable_attributes_rules.rs new file mode 100644 index 000000000..fe603c1c2 --- /dev/null +++ b/crates/milli/src/filterable_attributes_rules.rs @@ -0,0 +1,204 @@ +use deserr::{DeserializeError, Deserr, ValuePointerRef}; +use serde::{Deserialize, Serialize}; +use std::collections::{BTreeSet, HashSet}; +use utoipa::ToSchema; + +use crate::{ + attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch}, + constants::RESERVED_GEO_FIELD_NAME, + AttributePatterns, FieldsIdsMap, +}; + +#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, ToSchema)] +#[serde(untagged)] +pub enum FilterableAttributesRule { + Field(String), + Pattern(FilterableAttributesPatterns), +} + +impl FilterableAttributesRule { + pub fn match_str(&self, field: &str) -> PatternMatch { + match self { + FilterableAttributesRule::Field(pattern) => match_field_legacy(pattern, field), + FilterableAttributesRule::Pattern(patterns) => patterns.match_str(field), + } + } + + pub fn has_geo(&self) -> bool { + matches!(self, FilterableAttributesRule::Field(field_name) if field_name == RESERVED_GEO_FIELD_NAME) + } + + pub fn features(&self) -> FilterableAttributesFeatures { + match self { + FilterableAttributesRule::Field(_) => FilterableAttributesFeatures::legacy_default(), + FilterableAttributesRule::Pattern(patterns) => patterns.features(), + } + } +} + +#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Deserr, ToSchema)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +#[deserr(rename_all = camelCase, deny_unknown_fields)] +pub struct FilterableAttributesPatterns { + pub patterns: AttributePatterns, + #[serde(default)] + #[deserr(default)] + pub features: FilterableAttributesFeatures, +} + +impl FilterableAttributesPatterns { + pub fn match_str(&self, field: &str) -> PatternMatch { + self.patterns.match_str(field) + } + + pub fn features(&self) -> FilterableAttributesFeatures { + self.features.clone() + } +} + +#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Deserr, ToSchema)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +#[deserr(rename_all = camelCase, deny_unknown_fields)] +#[derive(Default)] +pub struct FilterableAttributesFeatures { + facet_search: bool, + filter: FilterFeatures, +} + +impl FilterableAttributesFeatures { + pub fn legacy_default() -> Self { + Self { facet_search: true, filter: FilterFeatures::legacy_default() } + } + + pub fn no_features() -> Self { + Self { facet_search: false, filter: FilterFeatures::no_features() } + } + + pub fn is_filterable(&self) -> bool { + self.filter.is_filterable() + } + + /// Check if `IS EMPTY` is allowed + pub fn is_filterable_empty(&self) -> bool { + self.filter.is_filterable_empty() + } + + /// Check if `=` and `IN` are allowed + pub fn is_filterable_equality(&self) -> bool { + self.filter.is_filterable_equality() + } + + /// Check if `IS NULL` is allowed + pub fn is_filterable_null(&self) -> bool { + self.filter.is_filterable_null() + } + + /// Check if `IS EXISTS` is allowed + pub fn is_filterable_exists(&self) -> bool { + self.filter.is_filterable_exists() + } + + /// Check if `<`, `>`, `<=`, `>=` or `TO` are allowed + pub fn is_filterable_comparison(&self) -> bool { + self.filter.is_filterable_comparison() + } + + /// Check if the facet search is allowed + pub fn is_facet_searchable(&self) -> bool { + self.facet_search + } + + pub fn allowed_filter_operators(&self) -> Vec { + self.filter.allowed_operators() + } +} + +impl Deserr for FilterableAttributesRule { + fn deserialize_from_value( + value: deserr::Value, + location: ValuePointerRef, + ) -> Result { + if value.kind() == deserr::ValueKind::Map { + Ok(Self::Pattern(FilterableAttributesPatterns::deserialize_from_value( + value, location, + )?)) + } else { + Ok(Self::Field(String::deserialize_from_value(value, location)?)) + } + } +} + +#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Deserr, ToSchema)] +pub struct FilterFeatures { + equality: bool, + comparison: bool, +} + +impl FilterFeatures { + pub fn allowed_operators(&self) -> Vec { + if !self.is_filterable() { + return vec![]; + } + + let mut operators = vec!["OR", "AND", "NOT"]; + if self.is_filterable_equality() { + operators.extend_from_slice(&["=", "!=", "IN"]); + } + if self.is_filterable_comparison() { + operators.extend_from_slice(&["<", ">", "<=", ">=", "TO"]); + } + if self.is_filterable_empty() { + operators.push("IS EMPTY"); + } + if self.is_filterable_null() { + operators.push("IS NULL"); + } + if self.is_filterable_exists() { + operators.push("EXISTS"); + } + + operators.into_iter().map(String::from).collect() + } + + pub fn is_filterable(&self) -> bool { + self.equality || self.comparison + } + + pub fn is_filterable_equality(&self) -> bool { + self.equality + } + + /// Check if `<`, `>`, `<=`, `>=` or `TO` are allowed + pub fn is_filterable_comparison(&self) -> bool { + self.comparison + } + + /// Check if `IS EMPTY` is allowed + pub fn is_filterable_empty(&self) -> bool { + self.is_filterable() + } + + /// Check if `IS EXISTS` is allowed + pub fn is_filterable_exists(&self) -> bool { + self.is_filterable() + } + + /// Check if `IS NULL` is allowed + pub fn is_filterable_null(&self) -> bool { + self.is_filterable() + } + + pub fn legacy_default() -> Self { + Self { equality: true, comparison: true } + } + + pub fn no_features() -> Self { + Self { equality: false, comparison: false } + } +} + +impl Default for FilterFeatures { + fn default() -> Self { + Self { equality: true, comparison: false } + } +} diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index c748324ae..d40ddb15d 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -876,11 +876,11 @@ impl Index { /* filterable fields */ - /// Writes the filterable fields names in the database. - pub(crate) fn put_filterable_fields( + /// Writes the filterable attributes rules in the database. + pub(crate) fn put_filterable_attributes_rules( &self, wtxn: &mut RwTxn<'_>, - fields: &HashSet, + #[allow(clippy::ptr_arg)] fields: &Vec, ) -> heed::Result<()> { self.main.remap_types::>().put( wtxn, @@ -889,13 +889,19 @@ impl Index { ) } - /// Deletes the filterable fields ids in the database. - pub(crate) fn delete_filterable_fields(&self, wtxn: &mut RwTxn<'_>) -> heed::Result { + /// Deletes the filterable attributes rules in the database. + pub(crate) fn delete_filterable_attributes_rules( + &self, + wtxn: &mut RwTxn<'_>, + ) -> heed::Result { self.main.remap_key_type::().delete(wtxn, main_key::FILTERABLE_FIELDS_KEY) } - /// Returns the filterable fields names. - pub fn filterable_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result> { + /// Returns the filterable attributes rules. + pub fn filterable_attributes_rules( + &self, + rtxn: &RoTxn<'_>, + ) -> heed::Result> { Ok(self .main .remap_types::>() @@ -903,21 +909,6 @@ impl Index { .unwrap_or_default()) } - /// Identical to `filterable_fields`, but returns ids instead. - pub fn filterable_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result> { - let fields = self.filterable_fields(rtxn)?; - let fields_ids_map = self.fields_ids_map(rtxn)?; - - let mut fields_ids = HashSet::new(); - for name in fields { - if let Some(field_id) = fields_ids_map.id(&name) { - fields_ids.insert(field_id); - } - } - - Ok(fields_ids) - } - /* sortable fields */ /// Writes the sortable fields names in the database. diff --git a/crates/milli/src/lib.rs b/crates/milli/src/lib.rs index 1d6d04fc7..85540c82e 100644 --- a/crates/milli/src/lib.rs +++ b/crates/milli/src/lib.rs @@ -9,12 +9,14 @@ pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; pub mod documents; mod asc_desc; +mod attribute_patterns; mod criterion; pub mod database_stats; mod error; mod external_documents_ids; pub mod facet; mod fields_ids_map; +mod filterable_attributes_rules; pub mod heed_codec; pub mod index; mod localized_attributes_rules; @@ -52,6 +54,8 @@ pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbor pub use {charabia as tokenizer, heed, rhai}; pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError}; +pub use self::attribute_patterns::AttributePatterns; +pub use self::attribute_patterns::PatternMatch; pub use self::criterion::{default_criteria, Criterion, CriterionError}; pub use self::error::{ Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError, @@ -59,6 +63,10 @@ pub use self::error::{ pub use self::external_documents_ids::ExternalDocumentsIds; pub use self::fieldids_weights_map::FieldidsWeightsMap; pub use self::fields_ids_map::{FieldsIdsMap, GlobalFieldsIdsMap}; +pub use self::filterable_attributes_rules::{ + FilterFeatures, FilterableAttributesFeatures, FilterableAttributesPatterns, + FilterableAttributesRule, +}; pub use self::heed_codec::{ BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec, @@ -67,7 +75,6 @@ pub use self::heed_codec::{ }; pub use self::index::Index; pub use self::localized_attributes_rules::LocalizedAttributesRule; -use self::localized_attributes_rules::LocalizedFieldIds; pub use self::search::facet::{FacetValueHit, SearchForFacetValues}; pub use self::search::similar::Similar; pub use self::search::{ diff --git a/crates/milli/src/localized_attributes_rules.rs b/crates/milli/src/localized_attributes_rules.rs index 2b9bf099c..81015c458 100644 --- a/crates/milli/src/localized_attributes_rules.rs +++ b/crates/milli/src/localized_attributes_rules.rs @@ -4,8 +4,9 @@ use charabia::Language; use serde::{Deserialize, Serialize}; use utoipa::ToSchema; +use crate::attribute_patterns::PatternMatch; use crate::fields_ids_map::FieldsIdsMap; -use crate::FieldId; +use crate::{AttributePatterns, FieldId}; /// A rule that defines which locales are supported for a given attribute. /// @@ -17,18 +18,18 @@ use crate::FieldId; /// The pattern `*attribute_name*` matches any attribute name that contains `attribute_name`. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] pub struct LocalizedAttributesRule { - pub attribute_patterns: Vec, + pub attribute_patterns: AttributePatterns, #[schema(value_type = Vec)] pub locales: Vec, } impl LocalizedAttributesRule { pub fn new(attribute_patterns: Vec, locales: Vec) -> Self { - Self { attribute_patterns, locales } + Self { attribute_patterns: AttributePatterns::from(attribute_patterns), locales } } - pub fn match_str(&self, str: &str) -> bool { - self.attribute_patterns.iter().any(|pattern| match_pattern(pattern.as_str(), str)) + pub fn match_str(&self, str: &str) -> PatternMatch { + self.attribute_patterns.match_str(str) } pub fn locales(&self) -> &[Language] { @@ -36,20 +37,6 @@ impl LocalizedAttributesRule { } } -fn match_pattern(pattern: &str, str: &str) -> bool { - if pattern == "*" { - true - } else if pattern.starts_with('*') && pattern.ends_with('*') { - str.contains(&pattern[1..pattern.len() - 1]) - } else if let Some(pattern) = pattern.strip_prefix('*') { - str.ends_with(pattern) - } else if let Some(pattern) = pattern.strip_suffix('*') { - str.starts_with(pattern) - } else { - pattern == str - } -} - #[derive(Debug, Clone, PartialEq, Eq)] pub struct LocalizedFieldIds { field_id_to_locales: HashMap>, @@ -65,13 +52,13 @@ impl LocalizedFieldIds { if let Some(rules) = rules { let fields = fields_ids.filter_map(|field_id| { - fields_ids_map.name(field_id).map(|field_name| (field_id, field_name)) + fields_ids_map.name(field_id).map(|field_name: &str| (field_id, field_name)) }); for (field_id, field_name) in fields { let mut locales = Vec::new(); for rule in rules { - if rule.match_str(field_name) { + if rule.match_str(field_name) == PatternMatch::Match { locales.extend(rule.locales.iter()); // Take the first rule that matches break; @@ -89,10 +76,6 @@ impl LocalizedFieldIds { Self { field_id_to_locales } } - pub fn locales(&self, fields_id: FieldId) -> Option<&[Language]> { - self.field_id_to_locales.get(&fields_id).map(Vec::as_slice) - } - pub fn all_locales(&self) -> Vec { let mut locales = Vec::new(); for field_locales in self.field_id_to_locales.values() { @@ -108,24 +91,3 @@ impl LocalizedFieldIds { locales } } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_match_pattern() { - assert!(match_pattern("*", "test")); - assert!(match_pattern("test*", "test")); - assert!(match_pattern("test*", "testa")); - assert!(match_pattern("*test", "test")); - assert!(match_pattern("*test", "atest")); - assert!(match_pattern("*test*", "test")); - assert!(match_pattern("*test*", "atesta")); - assert!(match_pattern("*test*", "atest")); - assert!(match_pattern("*test*", "testa")); - assert!(!match_pattern("test*test", "test")); - assert!(!match_pattern("*test", "testa")); - assert!(!match_pattern("test*", "atest")); - } -}