diff --git a/Cargo.lock b/Cargo.lock index 8a0220c3e..fda5f2493 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -468,7 +468,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" [[package]] name = "benchmarks" -version = "1.5.0" +version = "1.5.1" dependencies = [ "anyhow", "bytes", @@ -1269,7 +1269,7 @@ dependencies = [ [[package]] name = "dump" -version = "1.5.0" +version = "1.5.1" dependencies = [ "anyhow", "big_s", @@ -1486,7 +1486,7 @@ dependencies = [ [[package]] name = "file-store" -version = "1.5.0" +version = "1.5.1" dependencies = [ "faux", "tempfile", @@ -1508,7 +1508,7 @@ dependencies = [ [[package]] name = "filter-parser" -version = "1.5.0" +version = "1.5.1" dependencies = [ "insta", "nom", @@ -1539,7 +1539,7 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "1.5.0" +version = "1.5.1" dependencies = [ "criterion", "serde_json", @@ -1657,7 +1657,7 @@ dependencies = [ [[package]] name = "fuzzers" -version = "1.5.0" +version = "1.5.1" dependencies = [ "arbitrary", "clap", @@ -2457,7 +2457,7 @@ dependencies = [ [[package]] name = "index-scheduler" -version = "1.5.0" +version = "1.5.1" dependencies = [ "anyhow", "big_s", @@ -2654,7 +2654,7 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "1.5.0" +version = "1.5.1" dependencies = [ "criterion", "serde_json", @@ -3083,7 +3083,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "meili-snap" -version = "1.5.0" +version = "1.5.1" dependencies = [ "insta", "md5", @@ -3092,7 +3092,7 @@ dependencies = [ [[package]] name = "meilisearch" -version = "1.5.0" +version = "1.5.1" dependencies = [ "actix-cors", "actix-http", @@ -3182,7 +3182,7 @@ dependencies = [ [[package]] name = "meilisearch-auth" -version = "1.5.0" +version = "1.5.1" dependencies = [ "base64 0.21.2", "enum-iterator", @@ -3201,7 +3201,7 @@ dependencies = [ [[package]] name = "meilisearch-types" -version = "1.5.0" +version = "1.5.1" dependencies = [ "actix-web", "anyhow", @@ -3231,7 +3231,7 @@ dependencies = [ [[package]] name = "meilitool" -version = "1.5.0" +version = "1.5.1" dependencies = [ "anyhow", "clap", @@ -3269,7 +3269,7 @@ dependencies = [ [[package]] name = "milli" -version = "1.5.0" +version = "1.5.1" dependencies = [ "big_s", "bimap", @@ -3592,7 +3592,7 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" [[package]] name = "permissive-json-pointer" -version = "1.5.0" +version = "1.5.1" dependencies = [ "big_s", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 7b8fab8e1..fdaf00cfe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ members = [ ] [workspace.package] -version = "1.5.0" +version = "1.5.1" authors = ["Quentin de Quelen ", "Clément Renault "] description = "Meilisearch HTTP server" homepage = "https://meilisearch.com" diff --git a/meilisearch/tests/search/restrict_searchable.rs b/meilisearch/tests/search/restrict_searchable.rs index cfdff95ee..7bbdca38f 100644 --- a/meilisearch/tests/search/restrict_searchable.rs +++ b/meilisearch/tests/search/restrict_searchable.rs @@ -335,3 +335,35 @@ async fn exactness_ranking_rule_order() { }) .await; } + +#[actix_rt::test] +async fn search_on_exact_field() { + let server = Server::new().await; + let index = index_with_documents( + &server, + &json!([ + { + "title": "Captain Marvel", + "exact": "Captain Marivel", + "id": "1", + }, + { + "title": "Captain Marivel", + "exact": "Captain the Marvel", + "id": "2", + }]), + ) + .await; + + let (response, code) = + index.update_settings_typo_tolerance(json!({ "disableOnAttributes": ["exact"] })).await; + assert_eq!(202, code, "{:?}", response); + index.wait_task(1).await; + // Searching on an exact attribute should only return the document matching without typo. + index + .search(json!({"q": "Marvel", "attributesToSearchOn": ["exact"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(response["hits"].as_array().unwrap().len(), @"1"); + }) + .await; +} diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs index 051e366d0..76948f1ed 100644 --- a/milli/src/search/new/db_cache.rs +++ b/milli/src/search/new/db_cache.rs @@ -162,7 +162,8 @@ impl<'ctx> SearchContext<'ctx> { match &self.restricted_fids { Some(restricted_fids) => { let interned = self.word_interner.get(word).as_str(); - let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); + let keys: Vec<_> = + restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect(); DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( self.txn, @@ -187,13 +188,29 @@ impl<'ctx> SearchContext<'ctx> { &mut self, word: Interned, ) -> Result> { - DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( - self.txn, - word, - self.word_interner.get(word).as_str(), - &mut self.db_cache.exact_word_docids, - self.index.exact_word_docids.remap_data_type::(), - ) + match &self.restricted_fids { + Some(restricted_fids) => { + let interned = self.word_interner.get(word).as_str(); + let keys: Vec<_> = + restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect(); + + DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( + self.txn, + word, + &keys[..], + &mut self.db_cache.exact_word_docids, + self.index.word_fid_docids.remap_data_type::(), + merge_cbo_roaring_bitmaps, + ) + } + None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( + self.txn, + word, + self.word_interner.get(word).as_str(), + &mut self.db_cache.exact_word_docids, + self.index.exact_word_docids.remap_data_type::(), + ), + } } pub fn word_prefix_docids(&mut self, prefix: Word) -> Result> { @@ -224,7 +241,8 @@ impl<'ctx> SearchContext<'ctx> { match &self.restricted_fids { Some(restricted_fids) => { let interned = self.word_interner.get(prefix).as_str(); - let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); + let keys: Vec<_> = + restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect(); DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( self.txn, @@ -249,13 +267,29 @@ impl<'ctx> SearchContext<'ctx> { &mut self, prefix: Interned, ) -> Result> { - DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( - self.txn, - prefix, - self.word_interner.get(prefix).as_str(), - &mut self.db_cache.exact_word_prefix_docids, - self.index.exact_word_prefix_docids.remap_data_type::(), - ) + match &self.restricted_fids { + Some(restricted_fids) => { + let interned = self.word_interner.get(prefix).as_str(); + let keys: Vec<_> = + restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect(); + + DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( + self.txn, + prefix, + &keys[..], + &mut self.db_cache.exact_word_prefix_docids, + self.index.word_prefix_fid_docids.remap_data_type::(), + merge_cbo_roaring_bitmaps, + ) + } + None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( + self.txn, + prefix, + self.word_interner.get(prefix).as_str(), + &mut self.db_cache.exact_word_prefix_docids, + self.index.exact_word_prefix_docids.remap_data_type::(), + ), + } } pub fn get_db_word_pair_proximity_docids( diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index eaf55ccbb..a1b5da4e8 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -50,7 +50,9 @@ use crate::distance::NDotProductPoint; use crate::error::FieldIdMapMissingEntry; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::search::new::distinct::apply_distinct_rule; -use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError}; +use crate::{ + AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, +}; /// A structure used throughout the execution of a search query. pub struct SearchContext<'ctx> { @@ -61,7 +63,7 @@ pub struct SearchContext<'ctx> { pub phrase_interner: DedupInterner, pub term_interner: Interner, pub phrase_docids: PhraseDocIdsCache, - pub restricted_fids: Option>, + pub restricted_fids: Option, } impl<'ctx> SearchContext<'ctx> { @@ -81,8 +83,9 @@ impl<'ctx> SearchContext<'ctx> { pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> { let fids_map = self.index.fields_ids_map(self.txn)?; let searchable_names = self.index.searchable_fields(self.txn)?; + let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?; - let mut restricted_fids = Vec::new(); + let mut restricted_fids = RestrictedFids::default(); let mut contains_wildcard = false; for field_name in searchable_attributes { if field_name == "*" { @@ -121,7 +124,11 @@ impl<'ctx> SearchContext<'ctx> { } }; - restricted_fids.push(fid); + if exact_attributes_ids.contains(&fid) { + restricted_fids.exact.push(fid); + } else { + restricted_fids.tolerant.push(fid); + }; } self.restricted_fids = (!contains_wildcard).then_some(restricted_fids); @@ -145,6 +152,18 @@ impl Word { } } +#[derive(Debug, Clone, Default)] +pub struct RestrictedFids { + pub tolerant: Vec, + pub exact: Vec, +} + +impl RestrictedFids { + pub fn contains(&self, fid: &FieldId) -> bool { + self.tolerant.contains(fid) || self.exact.contains(fid) + } +} + /// Apply the [`TermsMatchingStrategy`] to the query graph and resolve it. fn resolve_maximally_reduced_query_graph( ctx: &mut SearchContext,