diff --git a/crates/benchmarks/benches/indexing.rs b/crates/benchmarks/benches/indexing.rs index 4bd5315ff..9938fca26 100644 --- a/crates/benchmarks/benches/indexing.rs +++ b/crates/benchmarks/benches/indexing.rs @@ -12,7 +12,7 @@ use milli::progress::Progress; use milli::update::new::indexer; use milli::update::{IndexerConfig, Settings}; use milli::vector::EmbeddingConfigs; -use milli::Index; +use milli::{FilterableAttributesRule, Index}; use rand::seq::SliceRandom; use rand_chacha::rand_core::SeedableRng; use roaring::RoaringBitmap; @@ -57,7 +57,8 @@ fn setup_settings<'t>( let searchable_fields = searchable_fields.iter().map(|s| s.to_string()).collect(); builder.set_searchable_fields(searchable_fields); - let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect(); + let filterable_fields = + filterable_fields.iter().map(|s| FilterableAttributesRule::Field(s.to_string())).collect(); builder.set_filterable_fields(filterable_fields); let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect(); diff --git a/crates/benchmarks/benches/search_geo.rs b/crates/benchmarks/benches/search_geo.rs index 72503ce57..d76929f99 100644 --- a/crates/benchmarks/benches/search_geo.rs +++ b/crates/benchmarks/benches/search_geo.rs @@ -2,7 +2,7 @@ mod datasets_paths; mod utils; use criterion::{criterion_group, criterion_main}; -use milli::update::Settings; +use milli::{update::Settings, FilterableAttributesRule}; use utils::Conf; #[cfg(not(windows))] @@ -21,8 +21,10 @@ fn base_conf(builder: &mut Settings) { ["name", "alternatenames", "elevation"].iter().map(|s| s.to_string()).collect(); builder.set_searchable_fields(searchable_fields); - let filterable_fields = - ["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect(); + let filterable_fields = ["_geo", "population", "elevation"] + .iter() + .map(|s| FilterableAttributesRule::Field(s.to_string())) + .collect(); builder.set_filterable_fields(filterable_fields); let sortable_fields = diff --git a/crates/benchmarks/benches/search_songs.rs b/crates/benchmarks/benches/search_songs.rs index bef014a0e..680a675ef 100644 --- a/crates/benchmarks/benches/search_songs.rs +++ b/crates/benchmarks/benches/search_songs.rs @@ -2,7 +2,7 @@ mod datasets_paths; mod utils; use criterion::{criterion_group, criterion_main}; -use milli::update::Settings; +use milli::{update::Settings, FilterableAttributesRule}; use utils::Conf; #[cfg(not(windows))] @@ -22,7 +22,7 @@ fn base_conf(builder: &mut Settings) { let faceted_fields = ["released-timestamp", "duration-float", "genre", "country", "artist"] .iter() - .map(|s| s.to_string()) + .map(|s| FilterableAttributesRule::Field(s.to_string())) .collect(); builder.set_filterable_fields(faceted_fields); } diff --git a/crates/dump/src/lib.rs b/crates/dump/src/lib.rs index e7fd22333..4e2d6ac2f 100644 --- a/crates/dump/src/lib.rs +++ b/crates/dump/src/lib.rs @@ -233,8 +233,8 @@ pub(crate) mod test { use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures}; use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::keys::{Action, Key}; - use meilisearch_types::milli; use meilisearch_types::milli::update::Setting; + use meilisearch_types::milli::{self, FilterableAttributesRule}; use meilisearch_types::settings::{Checked, FacetingSettings, Settings}; use meilisearch_types::task_view::DetailsView; use meilisearch_types::tasks::{Details, Kind, Status}; @@ -279,7 +279,10 @@ pub(crate) mod test { let settings = Settings { displayed_attributes: Setting::Set(vec![S("race"), S("name")]).into(), searchable_attributes: Setting::Set(vec![S("name"), S("race")]).into(), - filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }), + filterable_attributes: Setting::Set(vec![ + FilterableAttributesRule::Field(S("race")), + FilterableAttributesRule::Field(S("age")), + ]), sortable_attributes: Setting::Set(btreeset! { S("age") }), ranking_rules: Setting::NotSet, stop_words: Setting::NotSet, diff --git a/crates/index-scheduler/src/scheduler/test_failure.rs b/crates/index-scheduler/src/scheduler/test_failure.rs index 5cdcb248b..191910d38 100644 --- a/crates/index-scheduler/src/scheduler/test_failure.rs +++ b/crates/index-scheduler/src/scheduler/test_failure.rs @@ -1,11 +1,11 @@ use std::time::Instant; use big_s::S; -use maplit::btreeset; use meili_snap::snapshot; use meilisearch_types::milli::obkv_to_json; use meilisearch_types::milli::update::IndexDocumentsMethod::*; use meilisearch_types::milli::update::Setting; +use meilisearch_types::milli::FilterableAttributesRule; use meilisearch_types::tasks::{Kind, KindWithContent}; use crate::insta_snapshot::snapshot_index_scheduler; @@ -127,7 +127,8 @@ fn fail_in_process_batch_for_document_deletion() { use meilisearch_types::settings::{Settings, Unchecked}; let mut new_settings: Box> = Box::default(); - new_settings.filterable_attributes = Setting::Set(btreeset!(S("catto"))); + new_settings.filterable_attributes = + Setting::Set(vec![FilterableAttributesRule::Field(S("catto"))]); index_scheduler .register( diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index 75f4a8c17..12b98b729 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -1766,7 +1766,7 @@ pub(crate) mod tests { use big_s::S; use bumpalo::Bump; use heed::{EnvOpenOptions, RwTxn}; - use maplit::{btreemap, hashset}; + use maplit::btreemap; use memmap2::Mmap; use tempfile::TempDir; @@ -1782,7 +1782,8 @@ pub(crate) mod tests { use crate::vector::settings::{EmbedderSource, EmbeddingSettings}; use crate::vector::EmbeddingConfigs; use crate::{ - db_snap, obkv_to_json, Filter, Index, Search, SearchResult, ThreadPoolNoAbortBuilder, + db_snap, obkv_to_json, Filter, FilterableAttributesRule, Index, Search, SearchResult, + ThreadPoolNoAbortBuilder, }; pub(crate) struct TempIndex { @@ -2189,7 +2190,7 @@ pub(crate) mod tests { let rtxn = index.read_txn().unwrap(); let real = index.searchable_fields(&rtxn).unwrap(); - assert_eq!(real, &["doggo", "name"]); + assert!(real.is_empty()); let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap(); assert_eq!(user_defined, &["doggo", "name"]); @@ -2217,7 +2218,9 @@ pub(crate) mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + RESERVED_GEO_FIELD_NAME.to_string(), + )]); }) .unwrap(); index @@ -2325,7 +2328,9 @@ pub(crate) mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("doggo") }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "doggo".to_string(), + )]); }) .unwrap(); index @@ -2362,15 +2367,14 @@ pub(crate) mod tests { #[test] fn replace_documents_external_ids_and_soft_deletion_check() { - use big_s::S; - use maplit::hashset; - let index = TempIndex::new(); index .update_settings(|settings| { settings.set_primary_key("id".to_owned()); - settings.set_filterable_fields(hashset! { S("doggo") }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "doggo".to_string(), + )]); }) .unwrap(); @@ -2903,8 +2907,9 @@ pub(crate) mod tests { index .update_settings(|settings| { settings.set_primary_key("id".to_string()); - settings - .set_filterable_fields(HashSet::from([RESERVED_GEO_FIELD_NAME.to_string()])); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + RESERVED_GEO_FIELD_NAME.to_string(), + )]); }) .unwrap(); @@ -2938,8 +2943,9 @@ pub(crate) mod tests { index .update_settings(|settings| { settings.set_primary_key("id".to_string()); - settings - .set_filterable_fields(HashSet::from([RESERVED_GEO_FIELD_NAME.to_string()])); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + RESERVED_GEO_FIELD_NAME.to_string(), + )]); }) .unwrap(); diff --git a/crates/milli/src/search/facet/facet_distribution.rs b/crates/milli/src/search/facet/facet_distribution.rs index b165d4e80..beb5d2568 100644 --- a/crates/milli/src/search/facet/facet_distribution.rs +++ b/crates/milli/src/search/facet/facet_distribution.rs @@ -382,7 +382,7 @@ impl<'a> FacetDistribution<'a> { ) -> Result<()> { let mut invalid_facets = BTreeSet::new(); if let Some(facets) = &self.facets { - for (field, _) in facets { + for field in facets.keys() { let is_valid_faceted_field = fields_ids_map.id_with_metadata(field).map_or(false, |(_, metadata)| { metadata.is_faceted(filterable_attributes_rules) @@ -439,11 +439,10 @@ mod tests { use std::iter; use big_s::S; - use maplit::hashset; use crate::documents::mmap_from_objects; use crate::index::tests::TempIndex; - use crate::{milli_snap, FacetDistribution, OrderBy}; + use crate::{milli_snap, FacetDistribution, FilterableAttributesRule, OrderBy}; #[test] fn few_candidates_few_facet_values() { @@ -453,7 +452,9 @@ mod tests { let index = TempIndex::new(); index - .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .update_settings(|settings| { + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) + }) .unwrap(); let documents = documents!([ @@ -524,7 +525,9 @@ mod tests { let index = TempIndex::new_with_map_size(4096 * 10_000); index - .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .update_settings(|settings| { + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) + }) .unwrap(); let facet_values = ["Red", "RED", " red ", "Blue", "BLUE"]; @@ -609,7 +612,9 @@ mod tests { let index = TempIndex::new_with_map_size(4096 * 10_000); index - .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .update_settings(|settings| { + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) + }) .unwrap(); let facet_values = (0..1000).map(|x| format!("{x:x}")).collect::>(); @@ -668,7 +673,9 @@ mod tests { let index = TempIndex::new_with_map_size(4096 * 10_000); index - .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .update_settings(|settings| { + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) + }) .unwrap(); let facet_values = (0..1000).collect::>(); @@ -719,7 +726,9 @@ mod tests { let index = TempIndex::new_with_map_size(4096 * 10_000); index - .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .update_settings(|settings| { + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) + }) .unwrap(); let facet_values = (0..1000).collect::>(); @@ -770,7 +779,9 @@ mod tests { let index = TempIndex::new_with_map_size(4096 * 10_000); index - .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .update_settings(|settings| { + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) + }) .unwrap(); let facet_values = (0..1000).collect::>(); @@ -821,7 +832,9 @@ mod tests { let index = TempIndex::new_with_map_size(4096 * 10_000); index - .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .update_settings(|settings| { + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) + }) .unwrap(); let facet_values = (0..1000).collect::>(); diff --git a/crates/milli/src/search/new/tests/cutoff.rs b/crates/milli/src/search/new/tests/cutoff.rs index 63b67f2e7..f2dfb45d6 100644 --- a/crates/milli/src/search/new/tests/cutoff.rs +++ b/crates/milli/src/search/new/tests/cutoff.rs @@ -5,13 +5,11 @@ use std::time::Duration; -use big_s::S; -use maplit::hashset; use meili_snap::snapshot; use crate::index::tests::TempIndex; use crate::score_details::{ScoreDetails, ScoringStrategy}; -use crate::{Criterion, Filter, Search, TimeBudget}; +use crate::{Criterion, Filter, FilterableAttributesRule, Search, TimeBudget}; fn create_index() -> TempIndex { let index = TempIndex::new(); @@ -20,7 +18,7 @@ fn create_index() -> TempIndex { .update_settings(|s| { s.set_primary_key("id".to_owned()); s.set_searchable_fields(vec!["text".to_owned()]); - s.set_filterable_fields(hashset! { S("id") }); + s.set_filterable_fields(vec![FilterableAttributesRule::Field("id".to_owned())]); s.set_criteria(vec![Criterion::Words, Criterion::Typo]); }) .unwrap(); diff --git a/crates/milli/src/search/new/tests/distinct.rs b/crates/milli/src/search/new/tests/distinct.rs index dd27bfc8a..d3c453957 100644 --- a/crates/milli/src/search/new/tests/distinct.rs +++ b/crates/milli/src/search/new/tests/distinct.rs @@ -19,7 +19,10 @@ use maplit::hashset; use super::collect_field_values; use crate::index::tests::TempIndex; -use crate::{AscDesc, Criterion, Index, Member, Search, SearchResult, TermsMatchingStrategy}; +use crate::{ + AscDesc, Criterion, FilterableAttributesRule, Index, Member, Search, SearchResult, + TermsMatchingStrategy, +}; fn create_index() -> TempIndex { let index = TempIndex::new(); @@ -236,7 +239,7 @@ fn test_distinct_placeholder_no_ranking_rules() { // Set the letter as filterable and unset the distinct attribute. index .update_settings(|s| { - s.set_filterable_fields(hashset! { S("letter") }); + s.set_filterable_fields(vec![FilterableAttributesRule::Field("letter".to_owned())]); s.reset_distinct_field(); }) .unwrap(); diff --git a/crates/milli/src/search/new/tests/integration.rs b/crates/milli/src/search/new/tests/integration.rs index e60a09ec5..e718eb39d 100644 --- a/crates/milli/src/search/new/tests/integration.rs +++ b/crates/milli/src/search/new/tests/integration.rs @@ -9,7 +9,7 @@ use crate::progress::Progress; use crate::update::new::indexer; use crate::update::{IndexerConfig, Settings}; use crate::vector::EmbeddingConfigs; -use crate::{db_snap, Criterion, Index}; +use crate::{db_snap, Criterion, FilterableAttributesRule, Index}; pub const CONTENT: &str = include_str!("../../../../tests/assets/test_set.ndjson"); use crate::constants::RESERVED_GEO_FIELD_NAME; @@ -25,14 +25,14 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let mut builder = Settings::new(&mut wtxn, &index, &config); builder.set_criteria(criteria.to_vec()); - builder.set_filterable_fields(hashset! { - S("tag"), - S("asc_desc_rank"), - S(RESERVED_GEO_FIELD_NAME), - S("opt1"), - S("opt1.opt2"), - S("tag_in") - }); + builder.set_filterable_fields(vec![ + FilterableAttributesRule::Field(S("tag")), + FilterableAttributesRule::Field(S("asc_desc_rank")), + FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)), + FilterableAttributesRule::Field(S("opt1")), + FilterableAttributesRule::Field(S("opt1.opt2")), + FilterableAttributesRule::Field(S("tag_in")), + ]); builder.set_sortable_fields(hashset! { S("tag"), S("asc_desc_rank"), diff --git a/crates/milli/src/snapshot_tests.rs b/crates/milli/src/snapshot_tests.rs index 6635ab2f4..3e58c44d9 100644 --- a/crates/milli/src/snapshot_tests.rs +++ b/crates/milli/src/snapshot_tests.rs @@ -386,7 +386,7 @@ pub fn snap_settings(index: &Index) -> String { write_setting_to_snap!(criteria); write_setting_to_snap!(displayed_fields); write_setting_to_snap!(distinct_field); - write_setting_to_snap!(filterable_fields); + write_setting_to_snap!(filterable_attributes_rules); write_setting_to_snap!(sortable_fields); write_setting_to_snap!(synonyms); write_setting_to_snap!(authorize_typos); diff --git a/crates/milli/src/update/settings.rs b/crates/milli/src/update/settings.rs index d38fdf138..42f38ea0a 100644 --- a/crates/milli/src/update/settings.rs +++ b/crates/milli/src/update/settings.rs @@ -1876,7 +1876,7 @@ pub fn validate_embedding_settings( mod tests { use big_s::S; use heed::types::Bytes; - use maplit::{btreemap, btreeset, hashset}; + use maplit::{btreemap, btreeset}; use meili_snap::snapshot; use super::*; @@ -2086,7 +2086,9 @@ mod tests { // Set the filterable fields to be the age. index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("age") }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "age".to_string(), + )]); }) .unwrap(); @@ -2101,8 +2103,8 @@ mod tests { // Check that the displayed fields are correctly set. let rtxn = index.read_txn().unwrap(); - let fields_ids = index.filterable_fields(&rtxn).unwrap(); - assert_eq!(fields_ids, hashset! { S("age") }); + let fields_ids = index.filterable_attributes_rules(&rtxn).unwrap(); + assert_eq!(fields_ids, vec![FilterableAttributesRule::Field("age".to_string(),)]); // Only count the field_id 0 and level 0 facet values. // TODO we must support typed CSVs for numbers to be understood. let fidmap = index.fields_ids_map(&rtxn).unwrap(); @@ -2144,14 +2146,23 @@ mod tests { // Set the filterable fields to be the age and the name. index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("age"), S("name") }); + settings.set_filterable_fields(vec![ + FilterableAttributesRule::Field("age".to_string()), + FilterableAttributesRule::Field("name".to_string()), + ]); }) .unwrap(); // Check that the displayed fields are correctly set. let rtxn = index.read_txn().unwrap(); - let fields_ids = index.filterable_fields(&rtxn).unwrap(); - assert_eq!(fields_ids, hashset! { S("age"), S("name") }); + let fields_ids = index.filterable_attributes_rules(&rtxn).unwrap(); + assert_eq!( + fields_ids, + vec![ + FilterableAttributesRule::Field("age".to_string()), + FilterableAttributesRule::Field("name".to_string()), + ] + ); let rtxn = index.read_txn().unwrap(); // Only count the field_id 2 and level 0 facet values. @@ -2176,14 +2187,16 @@ mod tests { // Remove the age from the filterable fields. index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("name") }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "name".to_string(), + )]); }) .unwrap(); // Check that the displayed fields are correctly set. let rtxn = index.read_txn().unwrap(); - let fields_ids = index.filterable_fields(&rtxn).unwrap(); - assert_eq!(fields_ids, hashset! { S("name") }); + let fields_ids = index.filterable_attributes_rules(&rtxn).unwrap(); + assert_eq!(fields_ids, vec![FilterableAttributesRule::Field("name".to_string())]); let rtxn = index.read_txn().unwrap(); // Only count the field_id 2 and level 0 facet values. @@ -2513,7 +2526,10 @@ mod tests { index .update_settings(|settings| { settings.set_displayed_fields(vec!["hello".to_string()]); - settings.set_filterable_fields(hashset! { S("age"), S("toto") }); + settings.set_filterable_fields(vec![ + FilterableAttributesRule::Field("age".to_string()), + FilterableAttributesRule::Field("toto".to_string()), + ]); settings.set_criteria(vec![Criterion::Asc(S("toto"))]); }) .unwrap(); @@ -2630,7 +2646,9 @@ mod tests { // Set the genres setting index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("genres") }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "genres".to_string(), + )]); }) .unwrap(); diff --git a/crates/milli/tests/search/facet_distribution.rs b/crates/milli/tests/search/facet_distribution.rs index 4d8bf324c..c5a61da9f 100644 --- a/crates/milli/tests/search/facet_distribution.rs +++ b/crates/milli/tests/search/facet_distribution.rs @@ -1,13 +1,12 @@ use big_s::S; use bumpalo::Bump; use heed::EnvOpenOptions; -use maplit::hashset; use milli::documents::mmap_from_objects; use milli::progress::Progress; use milli::update::new::indexer; use milli::update::{IndexerConfig, Settings}; use milli::vector::EmbeddingConfigs; -use milli::{FacetDistribution, Index, Object, OrderBy}; +use milli::{FacetDistribution, FilterableAttributesRule, Index, Object, OrderBy}; use serde_json::{from_value, json}; #[test] @@ -21,10 +20,10 @@ fn test_facet_distribution_with_no_facet_values() { let config = IndexerConfig::default(); let mut builder = Settings::new(&mut wtxn, &index, &config); - builder.set_filterable_fields(hashset! { - S("genres"), - S("tags"), - }); + builder.set_filterable_fields(vec![ + FilterableAttributesRule::Field(S("genres")), + FilterableAttributesRule::Field(S("tags")), + ]); builder.execute(|_| (), || false).unwrap(); wtxn.commit().unwrap(); diff --git a/crates/milli/tests/search/mod.rs b/crates/milli/tests/search/mod.rs index 337a4c88c..72b124219 100644 --- a/crates/milli/tests/search/mod.rs +++ b/crates/milli/tests/search/mod.rs @@ -11,7 +11,9 @@ use milli::progress::Progress; use milli::update::new::indexer; use milli::update::{IndexerConfig, Settings}; use milli::vector::EmbeddingConfigs; -use milli::{AscDesc, Criterion, DocumentId, Index, Member, TermsMatchingStrategy}; +use milli::{ + AscDesc, Criterion, DocumentId, FilterableAttributesRule, Index, Member, TermsMatchingStrategy, +}; use serde::{Deserialize, Deserializer}; use slice_group_by::GroupBy; @@ -42,14 +44,14 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let mut builder = Settings::new(&mut wtxn, &index, &config); builder.set_criteria(criteria.to_vec()); - builder.set_filterable_fields(hashset! { - S("tag"), - S("asc_desc_rank"), - S("_geo"), - S("opt1"), - S("opt1.opt2"), - S("tag_in") - }); + builder.set_filterable_fields(vec![ + FilterableAttributesRule::Field(S("tag")), + FilterableAttributesRule::Field(S("asc_desc_rank")), + FilterableAttributesRule::Field(S("_geo")), + FilterableAttributesRule::Field(S("opt1")), + FilterableAttributesRule::Field(S("opt1.opt2")), + FilterableAttributesRule::Field(S("tag_in")), + ]); builder.set_sortable_fields(hashset! { S("tag"), S("asc_desc_rank"),