From a3716c567886905acc15fe79141f6b86472165db Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 31 May 2023 14:57:46 +0200 Subject: [PATCH 01/13] add the new parameter to the search builder of milli --- milli/src/search/mod.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 3c972d9b0..9cdada837 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -28,6 +28,7 @@ pub struct Search<'a> { offset: usize, limit: usize, sort_criteria: Option>, + searchable_attributes: Option>, geo_strategy: new::GeoSortStrategy, terms_matching_strategy: TermsMatchingStrategy, scoring_strategy: ScoringStrategy, @@ -45,6 +46,7 @@ impl<'a> Search<'a> { offset: 0, limit: 20, sort_criteria: None, + searchable_attributes: None, geo_strategy: new::GeoSortStrategy::default(), terms_matching_strategy: TermsMatchingStrategy::default(), scoring_strategy: Default::default(), @@ -75,6 +77,11 @@ impl<'a> Search<'a> { self } + pub fn searchable_attributes(&mut self, searchable: Vec) -> &mut Search<'a> { + self.searchable_attributes = Some(searchable); + self + } + pub fn terms_matching_strategy(&mut self, value: TermsMatchingStrategy) -> &mut Search<'a> { self.terms_matching_strategy = value; self @@ -145,6 +152,7 @@ impl fmt::Debug for Search<'_> { offset, limit, sort_criteria, + searchable_attributes, geo_strategy: _, terms_matching_strategy, scoring_strategy, @@ -159,6 +167,7 @@ impl fmt::Debug for Search<'_> { .field("offset", offset) .field("limit", limit) .field("sort_criteria", sort_criteria) + .field("searchable_attributes", searchable_attributes) .field("terms_matching_strategy", terms_matching_strategy) .field("scoring_strategy", scoring_strategy) .field("exhaustive_number_hits", exhaustive_number_hits) From 461b5118bd97b18797ccdf6cdad55e2205e3ffc1 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 7 Jun 2023 11:24:40 +0200 Subject: [PATCH 02/13] Add API search setting --- meilisearch-types/src/error.rs | 1 + meilisearch/src/routes/indexes/search.rs | 5 +++++ meilisearch/src/search.rs | 10 ++++++++++ milli/examples/search.rs | 1 + milli/src/search/mod.rs | 5 +++-- milli/src/search/new/matches/mod.rs | 1 + milli/src/search/new/mod.rs | 1 + 7 files changed, 22 insertions(+), 2 deletions(-) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 3e08498de..cb6f78c94 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -224,6 +224,7 @@ InvalidIndexLimit , InvalidRequest , BAD_REQUEST ; InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; InvalidIndexUid , InvalidRequest , BAD_REQUEST ; +InvalidRestrictSearchableAttributes , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ; diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index cb70147cd..db0aedda5 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -70,6 +70,8 @@ pub struct SearchQueryGet { crop_marker: String, #[deserr(default, error = DeserrQueryParamError)] matching_strategy: MatchingStrategy, + #[deserr(default, error = DeserrQueryParamError)] + pub restrict_searchable_attributes: Option>, } impl From for SearchQuery { @@ -102,6 +104,9 @@ impl From for SearchQuery { highlight_post_tag: other.highlight_post_tag, crop_marker: other.crop_marker, matching_strategy: other.matching_strategy, + restrict_searchable_attributes: other + .restrict_searchable_attributes + .map(|o| o.into_iter().collect()), } } } diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index b2858ead3..3a91ea655 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -73,6 +73,8 @@ pub struct SearchQuery { pub crop_marker: String, #[deserr(default, error = DeserrJsonError, default)] pub matching_strategy: MatchingStrategy, + #[deserr(default, error = DeserrJsonError, default)] + pub restrict_searchable_attributes: Option>, } impl SearchQuery { @@ -128,6 +130,8 @@ pub struct SearchQueryWithIndex { pub crop_marker: String, #[deserr(default, error = DeserrJsonError, default)] pub matching_strategy: MatchingStrategy, + #[deserr(default, error = DeserrJsonError, default)] + pub restrict_searchable_attributes: Option>, } impl SearchQueryWithIndex { @@ -153,6 +157,7 @@ impl SearchQueryWithIndex { highlight_post_tag, crop_marker, matching_strategy, + restrict_searchable_attributes, } = self; ( index_uid, @@ -176,6 +181,7 @@ impl SearchQueryWithIndex { highlight_post_tag, crop_marker, matching_strategy, + restrict_searchable_attributes, // do not use ..Default::default() here, // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex` }, @@ -291,6 +297,10 @@ pub fn perform_search( search.query(query); } + if let Some(ref searchable) = query.restrict_searchable_attributes { + search.searchable_attributes(searchable); + } + let is_finite_pagination = query.is_finite_pagination(); search.terms_matching_strategy(query.matching_strategy.into()); diff --git a/milli/examples/search.rs b/milli/examples/search.rs index 87c9a004d..829cb6244 100644 --- a/milli/examples/search.rs +++ b/milli/examples/search.rs @@ -57,6 +57,7 @@ fn main() -> Result<(), Box> { false, &None, &None, + None, GeoSortStrategy::default(), 0, 20, diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 9cdada837..2f97143cc 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -28,7 +28,7 @@ pub struct Search<'a> { offset: usize, limit: usize, sort_criteria: Option>, - searchable_attributes: Option>, + searchable_attributes: Option<&'a [String]>, geo_strategy: new::GeoSortStrategy, terms_matching_strategy: TermsMatchingStrategy, scoring_strategy: ScoringStrategy, @@ -77,7 +77,7 @@ impl<'a> Search<'a> { self } - pub fn searchable_attributes(&mut self, searchable: Vec) -> &mut Search<'a> { + pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> { self.searchable_attributes = Some(searchable); self } @@ -126,6 +126,7 @@ impl<'a> Search<'a> { self.exhaustive_number_hits, &self.filter, &self.sort_criteria, + self.searchable_attributes, self.geo_strategy, self.offset, self.limit, diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs index f33d595e5..d70bb1550 100644 --- a/milli/src/search/new/matches/mod.rs +++ b/milli/src/search/new/matches/mod.rs @@ -514,6 +514,7 @@ mod tests { false, &None, &None, + None, crate::search::new::GeoSortStrategy::default(), 0, 100, diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 8df764f29..e72093bf8 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -355,6 +355,7 @@ pub fn execute_search( exhaustive_number_hits: bool, filters: &Option, sort_criteria: &Option>, + searchable_attributes: Option<&[String]>, geo_strategy: geo_sort::Strategy, from: usize, length: usize, From a61ca4066edbd752a44b7112628f65f027802ba7 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 7 Jun 2023 18:49:17 +0200 Subject: [PATCH 03/13] Add tests --- meilisearch/tests/search/mod.rs | 1 + .../tests/search/restrict_searchable.rs | 297 ++++++++++++++++++ 2 files changed, 298 insertions(+) create mode 100644 meilisearch/tests/search/restrict_searchable.rs diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 045cfde2c..6a55c7569 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -5,6 +5,7 @@ mod errors; mod formatted; mod multi; mod pagination; +mod restrict_searchable; use once_cell::sync::Lazy; use serde_json::{json, Value}; diff --git a/meilisearch/tests/search/restrict_searchable.rs b/meilisearch/tests/search/restrict_searchable.rs new file mode 100644 index 000000000..dda38560a --- /dev/null +++ b/meilisearch/tests/search/restrict_searchable.rs @@ -0,0 +1,297 @@ +use once_cell::sync::Lazy; +use serde_json::{json, Value}; + +use crate::common::index::Index; +use crate::common::Server; + +async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> { + let index = server.index("test"); + + index.add_documents(documents.clone(), None).await; + index.wait_task(0).await; + index +} + +static SIMPLE_SEARCH_DOCUMENTS: Lazy = Lazy::new(|| { + json!([ + { + "title": "Shazam!", + "desc": "a Captain Marvel ersatz", + "id": "1", + }, + { + "title": "Captain Planet", + "desc": "He's not part of the Marvel Cinematic Universe", + "id": "2", + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "3", + }]) +}); + +#[actix_rt::test] +async fn simple_search_on_title() { + let server = Server::new().await; + let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; + + // simple search should return 2 documents (ids: 2 and 3). + index + .search( + json!({"q": "Captain Marvel", "restrictSearchableAttributes": ["title"]}), + |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 2); + }, + ) + .await; +} + +#[actix_rt::test] +async fn simple_prefix_search_on_title() { + let server = Server::new().await; + let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; + + // simple search should return 2 documents (ids: 2 and 3). + index + .search( + json!({"q": "Captain Mar", "restrictSearchableAttributes": ["title"]}), + |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 2); + }, + ) + .await; +} + +#[actix_rt::test] +async fn simple_search_on_title_matching_strategy_all() { + let server = Server::new().await; + let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; + // simple search matching strategy all should only return 1 document (ids: 2). + index + .search(json!({"q": "Captain Marvel", "restrictSearchableAttributes": ["title"], "matchingStrategy": "all"}), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 1); + }) + .await; +} + +#[actix_rt::test] +async fn simple_search_on_unknown_field() { + let server = Server::new().await; + let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; + // simple search on unknown field shouldn't return any document. + index + .search( + json!({"q": "Captain Marvel", "restrictSearchableAttributes": ["unknown"]}), + |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 0); + }, + ) + .await; +} + +#[actix_rt::test] +async fn simple_search_on_no_field() { + let server = Server::new().await; + let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; + // simple search on no field shouldn't return any document. + index + .search( + json!({"q": "Captain Marvel", "restrictSearchableAttributes": []}), + |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 0); + }, + ) + .await; +} + +#[actix_rt::test] +async fn word_ranking_rule_order() { + let server = Server::new().await; + let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; + + // Document 3 should appear before document 2. + index + .search( + json!({"q": "Captain Marvel", "restrictSearchableAttributes": ["title"], "attributesToRetrieve": ["id"]}), + |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"], + json!([ + {"id": "3"}, + {"id": "2"}, + ]) + ); + }, + ) + .await; +} + +#[actix_rt::test] +async fn word_ranking_rule_order_exact_words() { + let server = Server::new().await; + let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; + index.update_settings_typo_tolerance(json!({"disableOnWords": ["Captain", "Marvel"]})).await; + index.wait_task(1).await; + + // simple search should return 2 documents (ids: 2 and 3). + index + .search( + json!({"q": "Captain Marvel", "restrictSearchableAttributes": ["title"], "attributesToRetrieve": ["id"]}), + |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"], + json!([ + {"id": "3"}, + {"id": "2"}, + ]) + ); + }, + ) + .await; +} + +#[actix_rt::test] +async fn typo_ranking_rule_order() { + let server = Server::new().await; + let index = index_with_documents( + &server, + &json!([ + { + "title": "Capitain Marivel", + "desc": "Captain Marvel", + "id": "1", + }, + { + "title": "Captain Marivel", + "desc": "a Shazam ersatz", + "id": "2", + }]), + ) + .await; + + // Document 2 should appear before document 1. + index + .search(json!({"q": "Captain Marvel", "restrictSearchableAttributes": ["title"], "attributesToRetrieve": ["id"]}), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"], + json!([ + {"id": "2"}, + {"id": "1"}, + ]) + ); + }) + .await; +} + +#[actix_rt::test] +async fn proximity_ranking_rule_order() { + let server = Server::new().await; + let index = index_with_documents( + &server, + &json!([ + { + "title": "Captain super mega cool. A Marvel story", + "desc": "Captain Marvel", + "id": "1", + }, + { + "title": "Captain America from Marvel", + "desc": "a Shazam ersatz", + "id": "2", + }]), + ) + .await; + + // Document 2 should appear before document 1. + index + .search(json!({"q": "Captain Marvel", "restrictSearchableAttributes": ["title"], "attributesToRetrieve": ["id"]}), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"], + json!([ + {"id": "2"}, + {"id": "1"}, + ]) + ); + }) + .await; +} + +#[actix_rt::test] +async fn attributes_ranking_rule_order() { + let server = Server::new().await; + let index = index_with_documents( + &server, + &json!([ + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "footer": "The story of Captain Marvel", + "id": "1", + }, + { + "title": "The Avengers", + "desc": "Captain Marvel is far from the earth", + "footer": "A super hero team", + "id": "2", + }]), + ) + .await; + + // Document 2 should appear before document 1. + index + .search(json!({"q": "Captain Marvel", "restrictSearchableAttributes": ["desc", "footer"], "attributesToRetrieve": ["id"]}), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"], + json!([ + {"id": "2"}, + {"id": "1"}, + ]) + ); + }) + .await; +} + +#[actix_rt::test] +async fn exactness_ranking_rule_order() { + let server = Server::new().await; + let index = index_with_documents( + &server, + &json!([ + { + "title": "Captain Marvel", + "desc": "Captain Marivel", + "id": "1", + }, + { + "title": "Captain Marvel", + "desc": "CaptainMarvel", + "id": "2", + }]), + ) + .await; + + // Document 2 should appear before document 1. + index + .search(json!({"q": "Captain Marvel", "attributesToRetrieve": ["id"], "restrictSearchableAttributes": ["desc"]}), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"], + json!([ + {"id": "2"}, + {"id": "1"}, + ]) + ); + }) + .await; +} From 9680e1e41f4389bea2c67d1d9a05f248f5db76f3 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 13 Jun 2023 14:41:53 +0200 Subject: [PATCH 04/13] Introduce a BytesDecodeOwned trait in heed_codecs --- milli/src/heed_codec/mod.rs | 6 ++++++ .../roaring_bitmap/bo_roaring_bitmap_codec.rs | 13 ++++++++++++- .../roaring_bitmap/cbo_roaring_bitmap_codec.rs | 10 ++++++++++ .../roaring_bitmap/roaring_bitmap_codec.rs | 10 ++++++++++ .../bo_roaring_bitmap_len_codec.rs | 14 +++++++++++++- .../cbo_roaring_bitmap_len_codec.rs | 13 ++++++++++++- .../roaring_bitmap_len_codec.rs | 10 ++++++++++ 7 files changed, 73 insertions(+), 3 deletions(-) diff --git a/milli/src/heed_codec/mod.rs b/milli/src/heed_codec/mod.rs index de2644e11..c54168a36 100644 --- a/milli/src/heed_codec/mod.rs +++ b/milli/src/heed_codec/mod.rs @@ -23,3 +23,9 @@ pub use self::roaring_bitmap_length::{ pub use self::script_language_codec::ScriptLanguageCodec; pub use self::str_beu32_codec::{StrBEU16Codec, StrBEU32Codec}; pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec}; + +pub trait BytesDecodeOwned { + type DItem; + + fn bytes_decode_owned(bytes: &[u8]) -> Option; +} diff --git a/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs index 994e23b39..9ad2e9707 100644 --- a/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs @@ -2,8 +2,11 @@ use std::borrow::Cow; use std::convert::TryInto; use std::mem::size_of; +use heed::BytesDecode; use roaring::RoaringBitmap; +use crate::heed_codec::BytesDecodeOwned; + pub struct BoRoaringBitmapCodec; impl BoRoaringBitmapCodec { @@ -13,7 +16,7 @@ impl BoRoaringBitmapCodec { } } -impl heed::BytesDecode<'_> for BoRoaringBitmapCodec { +impl BytesDecode<'_> for BoRoaringBitmapCodec { type DItem = RoaringBitmap; fn bytes_decode(bytes: &[u8]) -> Option { @@ -28,6 +31,14 @@ impl heed::BytesDecode<'_> for BoRoaringBitmapCodec { } } +impl BytesDecodeOwned for BoRoaringBitmapCodec { + type DItem = RoaringBitmap; + + fn bytes_decode_owned(bytes: &[u8]) -> Option { + Self::bytes_decode(bytes) + } +} + impl heed::BytesEncode<'_> for BoRoaringBitmapCodec { type EItem = RoaringBitmap; diff --git a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs index 01ce523ba..bf76287d8 100644 --- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs @@ -5,6 +5,8 @@ use std::mem::size_of; use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; use roaring::RoaringBitmap; +use crate::heed_codec::BytesDecodeOwned; + /// This is the limit where using a byteorder became less size efficient /// than using a direct roaring encoding, it is also the point where we are able /// to determine the encoding used only by using the array of bytes length. @@ -103,6 +105,14 @@ impl heed::BytesDecode<'_> for CboRoaringBitmapCodec { } } +impl BytesDecodeOwned for CboRoaringBitmapCodec { + type DItem = RoaringBitmap; + + fn bytes_decode_owned(bytes: &[u8]) -> Option { + Self::deserialize_from(bytes).ok() + } +} + impl heed::BytesEncode<'_> for CboRoaringBitmapCodec { type EItem = RoaringBitmap; diff --git a/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs index 6cec0eb44..f982cc105 100644 --- a/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs @@ -2,6 +2,8 @@ use std::borrow::Cow; use roaring::RoaringBitmap; +use crate::heed_codec::BytesDecodeOwned; + pub struct RoaringBitmapCodec; impl heed::BytesDecode<'_> for RoaringBitmapCodec { @@ -12,6 +14,14 @@ impl heed::BytesDecode<'_> for RoaringBitmapCodec { } } +impl BytesDecodeOwned for RoaringBitmapCodec { + type DItem = RoaringBitmap; + + fn bytes_decode_owned(bytes: &[u8]) -> Option { + RoaringBitmap::deserialize_from(bytes).ok() + } +} + impl heed::BytesEncode<'_> for RoaringBitmapCodec { type EItem = RoaringBitmap; diff --git a/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs b/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs index e749680a0..8fae60df7 100644 --- a/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs @@ -1,11 +1,23 @@ use std::mem; +use heed::BytesDecode; + +use crate::heed_codec::BytesDecodeOwned; + pub struct BoRoaringBitmapLenCodec; -impl heed::BytesDecode<'_> for BoRoaringBitmapLenCodec { +impl BytesDecode<'_> for BoRoaringBitmapLenCodec { type DItem = u64; fn bytes_decode(bytes: &[u8]) -> Option { Some((bytes.len() / mem::size_of::()) as u64) } } + +impl BytesDecodeOwned for BoRoaringBitmapLenCodec { + type DItem = u64; + + fn bytes_decode_owned(bytes: &[u8]) -> Option { + Self::bytes_decode(bytes) + } +} diff --git a/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs b/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs index 4f728f1cd..5719a538a 100644 --- a/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs @@ -1,11 +1,14 @@ use std::mem; +use heed::BytesDecode; + use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec}; use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD; +use crate::heed_codec::BytesDecodeOwned; pub struct CboRoaringBitmapLenCodec; -impl heed::BytesDecode<'_> for CboRoaringBitmapLenCodec { +impl BytesDecode<'_> for CboRoaringBitmapLenCodec { type DItem = u64; fn bytes_decode(bytes: &[u8]) -> Option { @@ -20,3 +23,11 @@ impl heed::BytesDecode<'_> for CboRoaringBitmapLenCodec { } } } + +impl BytesDecodeOwned for CboRoaringBitmapLenCodec { + type DItem = u64; + + fn bytes_decode_owned(bytes: &[u8]) -> Option { + Self::bytes_decode(bytes) + } +} diff --git a/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs b/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs index 4d266e413..a9b0506ff 100644 --- a/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs @@ -3,6 +3,8 @@ use std::mem; use byteorder::{LittleEndian, ReadBytesExt}; +use crate::heed_codec::BytesDecodeOwned; + const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346; const SERIAL_COOKIE: u16 = 12347; @@ -59,6 +61,14 @@ impl heed::BytesDecode<'_> for RoaringBitmapLenCodec { } } +impl BytesDecodeOwned for RoaringBitmapLenCodec { + type DItem = u64; + + fn bytes_decode_owned(bytes: &[u8]) -> Option { + RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok() + } +} + #[cfg(test)] mod tests { use heed::BytesEncode; From 0ccf1e2e404832b3623842ae3d8368f54c9df923 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 13 Jun 2023 14:42:38 +0200 Subject: [PATCH 05/13] Allow the search cache to store owned values --- milli/examples/search.rs | 1 - milli/src/search/mod.rs | 6 +- milli/src/search/new/db_cache.rs | 187 +++++++++++++++++----------- milli/src/search/new/matches/mod.rs | 1 - milli/src/search/new/mod.rs | 12 +- milli/src/update/mod.rs | 3 +- 6 files changed, 130 insertions(+), 80 deletions(-) diff --git a/milli/examples/search.rs b/milli/examples/search.rs index 829cb6244..87c9a004d 100644 --- a/milli/examples/search.rs +++ b/milli/examples/search.rs @@ -57,7 +57,6 @@ fn main() -> Result<(), Box> { false, &None, &None, - None, GeoSortStrategy::default(), 0, 20, diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 2f97143cc..baa391660 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -117,6 +117,11 @@ impl<'a> Search<'a> { pub fn execute(&self) -> Result { let mut ctx = SearchContext::new(self.index, self.rtxn); + + if let Some(searchable_attributes) = self.searchable_attributes { + ctx.searchable_attributes(searchable_attributes)?; + } + let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } = execute_search( &mut ctx, @@ -126,7 +131,6 @@ impl<'a> Search<'a> { self.exhaustive_number_hits, &self.filter, &self.sort_criteria, - self.searchable_attributes, self.geo_strategy, self.offset, self.limit, diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs index 90c604d72..2b2cd4d79 100644 --- a/milli/src/search/new/db_cache.rs +++ b/milli/src/search/new/db_cache.rs @@ -4,12 +4,13 @@ use std::hash::Hash; use fxhash::FxHashMap; use heed::types::ByteSlice; -use heed::{BytesDecode, BytesEncode, Database, RoTxn}; +use heed::{BytesEncode, Database, RoTxn}; use roaring::RoaringBitmap; use super::interner::Interned; use super::Word; -use crate::heed_codec::StrBEU16Codec; +use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec}; +use crate::update::MergeFn; use crate::{ CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext, }; @@ -22,48 +23,106 @@ use crate::{ #[derive(Default)] pub struct DatabaseCache<'ctx> { pub word_pair_proximity_docids: - FxHashMap<(u8, Interned, Interned), Option<&'ctx [u8]>>, + FxHashMap<(u8, Interned, Interned), Option>>, pub word_prefix_pair_proximity_docids: - FxHashMap<(u8, Interned, Interned), Option<&'ctx [u8]>>, + FxHashMap<(u8, Interned, Interned), Option>>, pub prefix_word_pair_proximity_docids: - FxHashMap<(u8, Interned, Interned), Option<&'ctx [u8]>>, - pub word_docids: FxHashMap, Option<&'ctx [u8]>>, - pub exact_word_docids: FxHashMap, Option<&'ctx [u8]>>, - pub word_prefix_docids: FxHashMap, Option<&'ctx [u8]>>, - pub exact_word_prefix_docids: FxHashMap, Option<&'ctx [u8]>>, + FxHashMap<(u8, Interned, Interned), Option>>, + pub word_docids: FxHashMap, Option>>, + pub exact_word_docids: FxHashMap, Option>>, + pub word_prefix_docids: FxHashMap, Option>>, + pub exact_word_prefix_docids: FxHashMap, Option>>, pub words_fst: Option>>, - pub word_position_docids: FxHashMap<(Interned, u16), Option<&'ctx [u8]>>, - pub word_prefix_position_docids: FxHashMap<(Interned, u16), Option<&'ctx [u8]>>, + pub word_position_docids: FxHashMap<(Interned, u16), Option>>, + pub word_prefix_position_docids: FxHashMap<(Interned, u16), Option>>, pub word_positions: FxHashMap, Vec>, pub word_prefix_positions: FxHashMap, Vec>, - pub word_fid_docids: FxHashMap<(Interned, u16), Option<&'ctx [u8]>>, - pub word_prefix_fid_docids: FxHashMap<(Interned, u16), Option<&'ctx [u8]>>, + pub word_fid_docids: FxHashMap<(Interned, u16), Option>>, + pub word_prefix_fid_docids: FxHashMap<(Interned, u16), Option>>, pub word_fids: FxHashMap, Vec>, pub word_prefix_fids: FxHashMap, Vec>, } impl<'ctx> DatabaseCache<'ctx> { - fn get_value<'v, K1, KC>( + fn get_value<'v, K1, KC, DC>( txn: &'ctx RoTxn, cache_key: K1, db_key: &'v KC::EItem, - cache: &mut FxHashMap>, + cache: &mut FxHashMap>>, db: Database, - ) -> Result> + ) -> Result> where K1: Copy + Eq + Hash, KC: BytesEncode<'v>, + DC: BytesDecodeOwned, { - let bitmap_ptr = match cache.entry(cache_key) { - Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(), + match cache.entry(cache_key) { + Entry::Occupied(_) => {} Entry::Vacant(entry) => { - let bitmap_ptr = db.get(txn, db_key)?; + let bitmap_ptr = db.get(txn, db_key)?.map(Cow::Borrowed); + entry.insert(bitmap_ptr); + } + } + + match cache.get(&cache_key).unwrap() { + Some(Cow::Borrowed(bytes)) => { + DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) + } + Some(Cow::Owned(bytes)) => { + DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) + } + None => Ok(None), + } + } + + fn get_value_from_keys<'v, K1, KC, DC>( + txn: &'ctx RoTxn, + cache_key: K1, + db_keys: &[&'v KC::EItem], + cache: &mut FxHashMap>>, + db: Database, + merger: MergeFn, + ) -> Result> + where + K1: Copy + Eq + Hash, + KC: BytesEncode<'v>, + DC: BytesDecodeOwned, + { + match cache.entry(cache_key) { + Entry::Occupied(_) => {} + Entry::Vacant(entry) => { + let bitmap_ptr: Option> = match db_keys { + [] => None, + [key] => db.get(txn, key)?.map(Cow::Borrowed), + keys => { + let bitmaps = keys + .into_iter() + .filter_map(|key| db.get(txn, key).transpose()) + .map(|v| v.map(Cow::Borrowed)) + .collect::>, _>>()?; + + if bitmaps.is_empty() { + None + } else { + Some(merger(&[], &bitmaps[..])?) + } + } + }; + entry.insert(bitmap_ptr); - bitmap_ptr } }; - Ok(bitmap_ptr) + + match cache.get(&cache_key).unwrap() { + Some(Cow::Borrowed(bytes)) => { + DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) + } + Some(Cow::Owned(bytes)) => { + DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) + } + None => Ok(None), + } } } impl<'ctx> SearchContext<'ctx> { @@ -99,30 +158,26 @@ impl<'ctx> SearchContext<'ctx> { /// Retrieve or insert the given value in the `word_docids` database. fn get_db_word_docids(&mut self, word: Interned) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( self.txn, word, self.word_interner.get(word).as_str(), &mut self.db_cache.word_docids, self.index.word_docids.remap_data_type::(), - )? - .map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } fn get_db_exact_word_docids( &mut self, word: Interned, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( self.txn, word, self.word_interner.get(word).as_str(), &mut self.db_cache.exact_word_docids, self.index.exact_word_docids.remap_data_type::(), - )? - .map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } pub fn word_prefix_docids(&mut self, prefix: Word) -> Result> { @@ -150,30 +205,26 @@ impl<'ctx> SearchContext<'ctx> { &mut self, prefix: Interned, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( self.txn, prefix, self.word_interner.get(prefix).as_str(), &mut self.db_cache.word_prefix_docids, self.index.word_prefix_docids.remap_data_type::(), - )? - .map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } fn get_db_exact_word_prefix_docids( &mut self, prefix: Interned, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( self.txn, prefix, self.word_interner.get(prefix).as_str(), &mut self.db_cache.exact_word_prefix_docids, self.index.exact_word_prefix_docids.remap_data_type::(), - )? - .map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } pub fn get_db_word_pair_proximity_docids( @@ -182,7 +233,7 @@ impl<'ctx> SearchContext<'ctx> { word2: Interned, proximity: u8, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( self.txn, (proximity, word1, word2), &( @@ -192,9 +243,7 @@ impl<'ctx> SearchContext<'ctx> { ), &mut self.db_cache.word_pair_proximity_docids, self.index.word_pair_proximity_docids.remap_data_type::(), - )? - .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } pub fn get_db_word_pair_proximity_docids_len( @@ -203,7 +252,7 @@ impl<'ctx> SearchContext<'ctx> { word2: Interned, proximity: u8, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>( self.txn, (proximity, word1, word2), &( @@ -213,11 +262,7 @@ impl<'ctx> SearchContext<'ctx> { ), &mut self.db_cache.word_pair_proximity_docids, self.index.word_pair_proximity_docids.remap_data_type::(), - )? - .map(|bytes| { - CboRoaringBitmapLenCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()) - }) - .transpose() + ) } pub fn get_db_word_prefix_pair_proximity_docids( @@ -226,7 +271,7 @@ impl<'ctx> SearchContext<'ctx> { prefix2: Interned, proximity: u8, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( self.txn, (proximity, word1, prefix2), &( @@ -236,9 +281,7 @@ impl<'ctx> SearchContext<'ctx> { ), &mut self.db_cache.word_prefix_pair_proximity_docids, self.index.word_prefix_pair_proximity_docids.remap_data_type::(), - )? - .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } pub fn get_db_prefix_word_pair_proximity_docids( &mut self, @@ -246,7 +289,7 @@ impl<'ctx> SearchContext<'ctx> { right: Interned, proximity: u8, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( self.txn, (proximity, left_prefix, right), &( @@ -256,9 +299,7 @@ impl<'ctx> SearchContext<'ctx> { ), &mut self.db_cache.prefix_word_pair_proximity_docids, self.index.prefix_word_pair_proximity_docids.remap_data_type::(), - )? - .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } pub fn get_db_word_fid_docids( @@ -266,15 +307,13 @@ impl<'ctx> SearchContext<'ctx> { word: Interned, fid: u16, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( self.txn, (word, fid), &(self.word_interner.get(word).as_str(), fid), &mut self.db_cache.word_fid_docids, self.index.word_fid_docids.remap_data_type::(), - )? - .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } pub fn get_db_word_prefix_fid_docids( @@ -282,15 +321,13 @@ impl<'ctx> SearchContext<'ctx> { word_prefix: Interned, fid: u16, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( self.txn, (word_prefix, fid), &(self.word_interner.get(word_prefix).as_str(), fid), &mut self.db_cache.word_prefix_fid_docids, self.index.word_prefix_fid_docids.remap_data_type::(), - )? - .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } pub fn get_db_word_fids(&mut self, word: Interned) -> Result> { @@ -309,7 +346,7 @@ impl<'ctx> SearchContext<'ctx> { for result in remap_key_type { let ((_, fid), value) = result?; // filling other caches to avoid searching for them again - self.db_cache.word_fid_docids.insert((word, fid), Some(value)); + self.db_cache.word_fid_docids.insert((word, fid), Some(Cow::Borrowed(value))); fids.push(fid); } entry.insert(fids.clone()); @@ -335,7 +372,9 @@ impl<'ctx> SearchContext<'ctx> { for result in remap_key_type { let ((_, fid), value) = result?; // filling other caches to avoid searching for them again - self.db_cache.word_prefix_fid_docids.insert((word_prefix, fid), Some(value)); + self.db_cache + .word_prefix_fid_docids + .insert((word_prefix, fid), Some(Cow::Borrowed(value))); fids.push(fid); } entry.insert(fids.clone()); @@ -350,15 +389,13 @@ impl<'ctx> SearchContext<'ctx> { word: Interned, position: u16, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( self.txn, (word, position), &(self.word_interner.get(word).as_str(), position), &mut self.db_cache.word_position_docids, self.index.word_position_docids.remap_data_type::(), - )? - .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } pub fn get_db_word_prefix_position_docids( @@ -366,15 +403,13 @@ impl<'ctx> SearchContext<'ctx> { word_prefix: Interned, position: u16, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( self.txn, (word_prefix, position), &(self.word_interner.get(word_prefix).as_str(), position), &mut self.db_cache.word_prefix_position_docids, self.index.word_prefix_position_docids.remap_data_type::(), - )? - .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } pub fn get_db_word_positions(&mut self, word: Interned) -> Result> { @@ -393,7 +428,9 @@ impl<'ctx> SearchContext<'ctx> { for result in remap_key_type { let ((_, position), value) = result?; // filling other caches to avoid searching for them again - self.db_cache.word_position_docids.insert((word, position), Some(value)); + self.db_cache + .word_position_docids + .insert((word, position), Some(Cow::Borrowed(value))); positions.push(position); } entry.insert(positions.clone()); @@ -424,7 +461,7 @@ impl<'ctx> SearchContext<'ctx> { // filling other caches to avoid searching for them again self.db_cache .word_prefix_position_docids - .insert((word_prefix, position), Some(value)); + .insert((word_prefix, position), Some(Cow::Borrowed(value))); positions.push(position); } entry.insert(positions.clone()); diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs index d70bb1550..f33d595e5 100644 --- a/milli/src/search/new/matches/mod.rs +++ b/milli/src/search/new/matches/mod.rs @@ -514,7 +514,6 @@ mod tests { false, &None, &None, - None, crate::search::new::GeoSortStrategy::default(), 0, 100, diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index e72093bf8..6cc155c56 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -57,6 +57,7 @@ pub struct SearchContext<'ctx> { pub phrase_interner: DedupInterner, pub term_interner: Interner, pub phrase_docids: PhraseDocIdsCache, + pub restricted_fids: Option>, } impl<'ctx> SearchContext<'ctx> { @@ -69,8 +70,18 @@ impl<'ctx> SearchContext<'ctx> { phrase_interner: <_>::default(), term_interner: <_>::default(), phrase_docids: <_>::default(), + restricted_fids: None, } } + + pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> { + let fids_map = self.index.fields_ids_map(&self.txn)?; + let restricted_fids = + searchable_attributes.iter().filter_map(|name| fids_map.id(name)).collect(); + self.restricted_fids = Some(restricted_fids); + + Ok(()) + } } #[derive(Clone, Copy, PartialEq, PartialOrd, Ord, Eq)] @@ -355,7 +366,6 @@ pub fn execute_search( exhaustive_number_hits: bool, filters: &Option, sort_criteria: &Option>, - searchable_attributes: Option<&[String]>, geo_strategy: geo_sort::Strategy, from: usize, length: usize, diff --git a/milli/src/update/mod.rs b/milli/src/update/mod.rs index 7a3fd1fd9..011a2eb60 100644 --- a/milli/src/update/mod.rs +++ b/milli/src/update/mod.rs @@ -4,7 +4,8 @@ pub use self::delete_documents::{DeleteDocuments, DeletionStrategy, DocumentDele pub use self::facet::bulk::FacetsUpdateBulk; pub use self::facet::incremental::FacetsUpdateIncrementalInner; pub use self::index_documents::{ - DocumentAdditionResult, DocumentId, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, + merge_roaring_bitmaps, DocumentAdditionResult, DocumentId, IndexDocuments, + IndexDocumentsConfig, IndexDocumentsMethod, MergeFn, }; pub use self::indexer_config::IndexerConfig; pub use self::prefix_word_pairs::{ From fb8fa071694fbca8625675128eff5e7b89d5399b Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 13 Jun 2023 17:37:35 +0200 Subject: [PATCH 06/13] Restrict field ids in search context --- milli/src/search/new/db_cache.rs | 74 +++++++++++++++++++++++++------- milli/src/update/mod.rs | 4 +- 2 files changed, 60 insertions(+), 18 deletions(-) diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs index 2b2cd4d79..6c1b0f7f8 100644 --- a/milli/src/search/new/db_cache.rs +++ b/milli/src/search/new/db_cache.rs @@ -10,7 +10,7 @@ use roaring::RoaringBitmap; use super::interner::Interned; use super::Word; use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec}; -use crate::update::MergeFn; +use crate::update::{merge_cbo_roaring_bitmaps, MergeFn}; use crate::{ CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext, }; @@ -79,7 +79,7 @@ impl<'ctx> DatabaseCache<'ctx> { fn get_value_from_keys<'v, K1, KC, DC>( txn: &'ctx RoTxn, cache_key: K1, - db_keys: &[&'v KC::EItem], + db_keys: &'v [KC::EItem], cache: &mut FxHashMap>>, db: Database, merger: MergeFn, @@ -88,6 +88,7 @@ impl<'ctx> DatabaseCache<'ctx> { K1: Copy + Eq + Hash, KC: BytesEncode<'v>, DC: BytesDecodeOwned, + KC::EItem: Sized, { match cache.entry(cache_key) { Entry::Occupied(_) => {} @@ -125,6 +126,7 @@ impl<'ctx> DatabaseCache<'ctx> { } } } + impl<'ctx> SearchContext<'ctx> { pub fn get_words_fst(&mut self) -> Result>> { if let Some(fst) = self.db_cache.words_fst.clone() { @@ -158,13 +160,28 @@ impl<'ctx> SearchContext<'ctx> { /// Retrieve or insert the given value in the `word_docids` database. fn get_db_word_docids(&mut self, word: Interned) -> Result> { - DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( - self.txn, - word, - self.word_interner.get(word).as_str(), - &mut self.db_cache.word_docids, - self.index.word_docids.remap_data_type::(), - ) + match &self.restricted_fids { + Some(restricted_fids) => { + let interned = self.word_interner.get(word).as_str(); + let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); + + DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( + self.txn, + word, + &keys[..], + &mut self.db_cache.word_docids, + self.index.word_fid_docids.remap_data_type::(), + merge_cbo_roaring_bitmaps, + ) + } + None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( + self.txn, + word, + self.word_interner.get(word).as_str(), + &mut self.db_cache.word_docids, + self.index.word_docids.remap_data_type::(), + ), + } } fn get_db_exact_word_docids( @@ -205,13 +222,28 @@ impl<'ctx> SearchContext<'ctx> { &mut self, prefix: Interned, ) -> Result> { - DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( - self.txn, - prefix, - self.word_interner.get(prefix).as_str(), - &mut self.db_cache.word_prefix_docids, - self.index.word_prefix_docids.remap_data_type::(), - ) + match &self.restricted_fids { + Some(restricted_fids) => { + let interned = self.word_interner.get(prefix).as_str(); + let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); + + DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( + self.txn, + prefix, + &keys[..], + &mut self.db_cache.word_prefix_docids, + self.index.word_prefix_fid_docids.remap_data_type::(), + merge_cbo_roaring_bitmaps, + ) + } + None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( + self.txn, + prefix, + self.word_interner.get(prefix).as_str(), + &mut self.db_cache.word_prefix_docids, + self.index.word_prefix_docids.remap_data_type::(), + ), + } } fn get_db_exact_word_prefix_docids( @@ -307,6 +339,11 @@ impl<'ctx> SearchContext<'ctx> { word: Interned, fid: u16, ) -> Result> { + // if the requested fid isn't in the restricted list, return None. + if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) { + return Ok(None); + } + DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( self.txn, (word, fid), @@ -321,6 +358,11 @@ impl<'ctx> SearchContext<'ctx> { word_prefix: Interned, fid: u16, ) -> Result> { + // if the requested fid isn't in the restricted list, return None. + if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) { + return Ok(None); + } + DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( self.txn, (word_prefix, fid), diff --git a/milli/src/update/mod.rs b/milli/src/update/mod.rs index 011a2eb60..32584825b 100644 --- a/milli/src/update/mod.rs +++ b/milli/src/update/mod.rs @@ -4,8 +4,8 @@ pub use self::delete_documents::{DeleteDocuments, DeletionStrategy, DocumentDele pub use self::facet::bulk::FacetsUpdateBulk; pub use self::facet::incremental::FacetsUpdateIncrementalInner; pub use self::index_documents::{ - merge_roaring_bitmaps, DocumentAdditionResult, DocumentId, IndexDocuments, - IndexDocumentsConfig, IndexDocumentsMethod, MergeFn, + merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, DocumentAdditionResult, DocumentId, + IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, MergeFn, }; pub use self::indexer_config::IndexerConfig; pub use self::prefix_word_pairs::{ From 993b0d012c0a932c4407e124cd4d58a58b91b512 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 13 Jun 2023 17:41:36 +0200 Subject: [PATCH 07/13] Remove proximity_ranking_rule_order test, fixing this test would force us to create a fid_word_pair_proximity_docids and a fid_word_prefix_pair_proximity_docids databases which may multiply the keys of word_pair_proximity_docids and word_prefix_pair_proximity_docids by the number of attributes in the searchable_attributes list --- .../tests/search/restrict_searchable.rs | 34 ------------------- 1 file changed, 34 deletions(-) diff --git a/meilisearch/tests/search/restrict_searchable.rs b/meilisearch/tests/search/restrict_searchable.rs index dda38560a..7c3b1a546 100644 --- a/meilisearch/tests/search/restrict_searchable.rs +++ b/meilisearch/tests/search/restrict_searchable.rs @@ -192,40 +192,6 @@ async fn typo_ranking_rule_order() { .await; } -#[actix_rt::test] -async fn proximity_ranking_rule_order() { - let server = Server::new().await; - let index = index_with_documents( - &server, - &json!([ - { - "title": "Captain super mega cool. A Marvel story", - "desc": "Captain Marvel", - "id": "1", - }, - { - "title": "Captain America from Marvel", - "desc": "a Shazam ersatz", - "id": "2", - }]), - ) - .await; - - // Document 2 should appear before document 1. - index - .search(json!({"q": "Captain Marvel", "restrictSearchableAttributes": ["title"], "attributesToRetrieve": ["id"]}), |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"], - json!([ - {"id": "2"}, - {"id": "1"}, - ]) - ); - }) - .await; -} - #[actix_rt::test] async fn attributes_ranking_rule_order() { let server = Server::new().await; From 42709ea9a5eb46431cc09db1e192d66f60572357 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 13 Jun 2023 18:52:02 +0200 Subject: [PATCH 08/13] Fix clippy warnings --- milli/src/search/new/db_cache.rs | 2 +- milli/src/search/new/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs index 6c1b0f7f8..e6b3b96bc 100644 --- a/milli/src/search/new/db_cache.rs +++ b/milli/src/search/new/db_cache.rs @@ -98,7 +98,7 @@ impl<'ctx> DatabaseCache<'ctx> { [key] => db.get(txn, key)?.map(Cow::Borrowed), keys => { let bitmaps = keys - .into_iter() + .iter() .filter_map(|key| db.get(txn, key).transpose()) .map(|v| v.map(Cow::Borrowed)) .collect::>, _>>()?; diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 6cc155c56..015940945 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -75,7 +75,7 @@ impl<'ctx> SearchContext<'ctx> { } pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> { - let fids_map = self.index.fields_ids_map(&self.txn)?; + let fids_map = self.index.fields_ids_map(self.txn)?; let restricted_fids = searchable_attributes.iter().filter_map(|name| fids_map.id(name)).collect(); self.restricted_fids = Some(restricted_fids); From 114f878205769c216690bd88ef7e78ddb4d516ca Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 20 Jun 2023 10:00:35 +0200 Subject: [PATCH 09/13] Rename restrictSearchableAttributes into attributesToSearchOn --- meilisearch-types/src/error.rs | 2 +- meilisearch/src/routes/indexes/search.rs | 8 ++-- meilisearch/src/search.rs | 14 +++---- .../tests/search/restrict_searchable.rs | 38 ++++++++----------- 4 files changed, 27 insertions(+), 35 deletions(-) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index cb6f78c94..8ce7102cb 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -224,7 +224,7 @@ InvalidIndexLimit , InvalidRequest , BAD_REQUEST ; InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; InvalidIndexUid , InvalidRequest , BAD_REQUEST ; -InvalidRestrictSearchableAttributes , InvalidRequest , BAD_REQUEST ; +InvalidAttributesToSearchOn , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ; diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index db0aedda5..06daa1e5a 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -70,8 +70,8 @@ pub struct SearchQueryGet { crop_marker: String, #[deserr(default, error = DeserrQueryParamError)] matching_strategy: MatchingStrategy, - #[deserr(default, error = DeserrQueryParamError)] - pub restrict_searchable_attributes: Option>, + #[deserr(default, error = DeserrQueryParamError)] + pub attributes_to_search_on: Option>, } impl From for SearchQuery { @@ -104,9 +104,7 @@ impl From for SearchQuery { highlight_post_tag: other.highlight_post_tag, crop_marker: other.crop_marker, matching_strategy: other.matching_strategy, - restrict_searchable_attributes: other - .restrict_searchable_attributes - .map(|o| o.into_iter().collect()), + attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()), } } } diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 3a91ea655..ce96ec748 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -73,8 +73,8 @@ pub struct SearchQuery { pub crop_marker: String, #[deserr(default, error = DeserrJsonError, default)] pub matching_strategy: MatchingStrategy, - #[deserr(default, error = DeserrJsonError, default)] - pub restrict_searchable_attributes: Option>, + #[deserr(default, error = DeserrJsonError, default)] + pub attributes_to_search_on: Option>, } impl SearchQuery { @@ -130,8 +130,8 @@ pub struct SearchQueryWithIndex { pub crop_marker: String, #[deserr(default, error = DeserrJsonError, default)] pub matching_strategy: MatchingStrategy, - #[deserr(default, error = DeserrJsonError, default)] - pub restrict_searchable_attributes: Option>, + #[deserr(default, error = DeserrJsonError, default)] + pub attributes_to_search_on: Option>, } impl SearchQueryWithIndex { @@ -157,7 +157,7 @@ impl SearchQueryWithIndex { highlight_post_tag, crop_marker, matching_strategy, - restrict_searchable_attributes, + attributes_to_search_on, } = self; ( index_uid, @@ -181,7 +181,7 @@ impl SearchQueryWithIndex { highlight_post_tag, crop_marker, matching_strategy, - restrict_searchable_attributes, + attributes_to_search_on, // do not use ..Default::default() here, // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex` }, @@ -297,7 +297,7 @@ pub fn perform_search( search.query(query); } - if let Some(ref searchable) = query.restrict_searchable_attributes { + if let Some(ref searchable) = query.attributes_to_search_on { search.searchable_attributes(searchable); } diff --git a/meilisearch/tests/search/restrict_searchable.rs b/meilisearch/tests/search/restrict_searchable.rs index 7c3b1a546..79f7f9733 100644 --- a/meilisearch/tests/search/restrict_searchable.rs +++ b/meilisearch/tests/search/restrict_searchable.rs @@ -39,7 +39,7 @@ async fn simple_search_on_title() { // simple search should return 2 documents (ids: 2 and 3). index .search( - json!({"q": "Captain Marvel", "restrictSearchableAttributes": ["title"]}), + json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"]}), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!(response["hits"].as_array().unwrap().len(), 2); @@ -55,13 +55,10 @@ async fn simple_prefix_search_on_title() { // simple search should return 2 documents (ids: 2 and 3). index - .search( - json!({"q": "Captain Mar", "restrictSearchableAttributes": ["title"]}), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!(response["hits"].as_array().unwrap().len(), 2); - }, - ) + .search(json!({"q": "Captain Mar", "attributesToSearchOn": ["title"]}), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 2); + }) .await; } @@ -71,7 +68,7 @@ async fn simple_search_on_title_matching_strategy_all() { let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; // simple search matching strategy all should only return 1 document (ids: 2). index - .search(json!({"q": "Captain Marvel", "restrictSearchableAttributes": ["title"], "matchingStrategy": "all"}), |response, code| { + .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "matchingStrategy": "all"}), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!(response["hits"].as_array().unwrap().len(), 1); }) @@ -85,7 +82,7 @@ async fn simple_search_on_unknown_field() { // simple search on unknown field shouldn't return any document. index .search( - json!({"q": "Captain Marvel", "restrictSearchableAttributes": ["unknown"]}), + json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!(response["hits"].as_array().unwrap().len(), 0); @@ -100,13 +97,10 @@ async fn simple_search_on_no_field() { let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; // simple search on no field shouldn't return any document. index - .search( - json!({"q": "Captain Marvel", "restrictSearchableAttributes": []}), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!(response["hits"].as_array().unwrap().len(), 0); - }, - ) + .search(json!({"q": "Captain Marvel", "attributesToSearchOn": []}), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 0); + }) .await; } @@ -118,7 +112,7 @@ async fn word_ranking_rule_order() { // Document 3 should appear before document 2. index .search( - json!({"q": "Captain Marvel", "restrictSearchableAttributes": ["title"], "attributesToRetrieve": ["id"]}), + json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!( @@ -143,7 +137,7 @@ async fn word_ranking_rule_order_exact_words() { // simple search should return 2 documents (ids: 2 and 3). index .search( - json!({"q": "Captain Marvel", "restrictSearchableAttributes": ["title"], "attributesToRetrieve": ["id"]}), + json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!( @@ -179,7 +173,7 @@ async fn typo_ranking_rule_order() { // Document 2 should appear before document 1. index - .search(json!({"q": "Captain Marvel", "restrictSearchableAttributes": ["title"], "attributesToRetrieve": ["id"]}), |response, code| { + .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!( response["hits"], @@ -215,7 +209,7 @@ async fn attributes_ranking_rule_order() { // Document 2 should appear before document 1. index - .search(json!({"q": "Captain Marvel", "restrictSearchableAttributes": ["desc", "footer"], "attributesToRetrieve": ["id"]}), |response, code| { + .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["desc", "footer"], "attributesToRetrieve": ["id"]}), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!( response["hits"], @@ -249,7 +243,7 @@ async fn exactness_ranking_rule_order() { // Document 2 should appear before document 1. index - .search(json!({"q": "Captain Marvel", "attributesToRetrieve": ["id"], "restrictSearchableAttributes": ["desc"]}), |response, code| { + .search(json!({"q": "Captain Marvel", "attributesToRetrieve": ["id"], "attributesToSearchOn": ["desc"]}), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!( response["hits"], From dc391deca0f319122a4abf168ab4ebd395292b04 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 20 Jun 2023 16:57:50 +0200 Subject: [PATCH 10/13] Reverse assert comparison to have a consistent error message --- .../tests/search/restrict_searchable.rs | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/meilisearch/tests/search/restrict_searchable.rs b/meilisearch/tests/search/restrict_searchable.rs index 79f7f9733..8805d097d 100644 --- a/meilisearch/tests/search/restrict_searchable.rs +++ b/meilisearch/tests/search/restrict_searchable.rs @@ -41,7 +41,7 @@ async fn simple_search_on_title() { .search( json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"]}), |response, code| { - assert_eq!(code, 200, "{}", response); + assert_eq!(200, code, "{}", response); assert_eq!(response["hits"].as_array().unwrap().len(), 2); }, ) @@ -56,7 +56,7 @@ async fn simple_prefix_search_on_title() { // simple search should return 2 documents (ids: 2 and 3). index .search(json!({"q": "Captain Mar", "attributesToSearchOn": ["title"]}), |response, code| { - assert_eq!(code, 200, "{}", response); + assert_eq!(200, code, "{}", response); assert_eq!(response["hits"].as_array().unwrap().len(), 2); }) .await; @@ -69,7 +69,7 @@ async fn simple_search_on_title_matching_strategy_all() { // simple search matching strategy all should only return 1 document (ids: 2). index .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "matchingStrategy": "all"}), |response, code| { - assert_eq!(code, 200, "{}", response); + assert_eq!(200, code, "{}", response); assert_eq!(response["hits"].as_array().unwrap().len(), 1); }) .await; @@ -84,7 +84,7 @@ async fn simple_search_on_unknown_field() { .search( json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}), |response, code| { - assert_eq!(code, 200, "{}", response); + assert_eq!(200, code, "{}", response); assert_eq!(response["hits"].as_array().unwrap().len(), 0); }, ) @@ -98,7 +98,7 @@ async fn simple_search_on_no_field() { // simple search on no field shouldn't return any document. index .search(json!({"q": "Captain Marvel", "attributesToSearchOn": []}), |response, code| { - assert_eq!(code, 200, "{}", response); + assert_eq!(200, code, "{}", response); assert_eq!(response["hits"].as_array().unwrap().len(), 0); }) .await; @@ -114,7 +114,7 @@ async fn word_ranking_rule_order() { .search( json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}), |response, code| { - assert_eq!(code, 200, "{}", response); + assert_eq!(200, code, "{}", response); assert_eq!( response["hits"], json!([ @@ -139,7 +139,7 @@ async fn word_ranking_rule_order_exact_words() { .search( json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}), |response, code| { - assert_eq!(code, 200, "{}", response); + assert_eq!(200, code, "{}", response); assert_eq!( response["hits"], json!([ @@ -174,7 +174,7 @@ async fn typo_ranking_rule_order() { // Document 2 should appear before document 1. index .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}), |response, code| { - assert_eq!(code, 200, "{}", response); + assert_eq!(200, code, "{}", response); assert_eq!( response["hits"], json!([ @@ -210,7 +210,7 @@ async fn attributes_ranking_rule_order() { // Document 2 should appear before document 1. index .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["desc", "footer"], "attributesToRetrieve": ["id"]}), |response, code| { - assert_eq!(code, 200, "{}", response); + assert_eq!(200, code, "{}", response); assert_eq!( response["hits"], json!([ @@ -244,7 +244,7 @@ async fn exactness_ranking_rule_order() { // Document 2 should appear before document 1. index .search(json!({"q": "Captain Marvel", "attributesToRetrieve": ["id"], "attributesToSearchOn": ["desc"]}), |response, code| { - assert_eq!(code, 200, "{}", response); + assert_eq!(200, code, "{}", response); assert_eq!( response["hits"], json!([ From 59f64a5256b8f56347385da90638181668919956 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 20 Jun 2023 17:04:59 +0200 Subject: [PATCH 11/13] Return an error when an attribute is not searchable --- meilisearch-types/src/error.rs | 3 + meilisearch/tests/search/errors.rs | 24 ++++++++ .../tests/search/restrict_searchable.rs | 16 ------ milli/src/error.rs | 10 ++++ milli/src/search/new/mod.rs | 55 ++++++++++++++++++- 5 files changed, 89 insertions(+), 19 deletions(-) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 8ce7102cb..8bfd15810 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -333,6 +333,9 @@ impl ErrorCode for milli::Error { UserError::SortRankingRuleMissing => Code::InvalidSearchSort, UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets, UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort, + UserError::InvalidSearchableAttribute { .. } => { + Code::InvalidAttributesToSearchOn + } UserError::CriterionError(_) => Code::InvalidSettingsRankingRules, UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField, UserError::SortError(_) => Code::InvalidSearchSort, diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index f314e8800..72c2c5b90 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -963,3 +963,27 @@ async fn sort_unset_ranking_rule() { ) .await; } + +#[actix_rt::test] +async fn search_on_unknown_field() { + let server = Server::new().await; + let index = server.index("test"); + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + index + .search( + json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}), + |response, code| { + assert_eq!(400, code, "{}", response); + assert_eq!(response, json!({ + "message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", + "code": "invalid_attributes_to_search_on", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_attributes_to_search_on" + })); + }, + ) + .await; +} diff --git a/meilisearch/tests/search/restrict_searchable.rs b/meilisearch/tests/search/restrict_searchable.rs index 8805d097d..9379b6558 100644 --- a/meilisearch/tests/search/restrict_searchable.rs +++ b/meilisearch/tests/search/restrict_searchable.rs @@ -75,22 +75,6 @@ async fn simple_search_on_title_matching_strategy_all() { .await; } -#[actix_rt::test] -async fn simple_search_on_unknown_field() { - let server = Server::new().await; - let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; - // simple search on unknown field shouldn't return any document. - index - .search( - json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}), - |response, code| { - assert_eq!(200, code, "{}", response); - assert_eq!(response["hits"].as_array().unwrap().len(), 0); - }, - ) - .await; -} - #[actix_rt::test] async fn simple_search_on_no_field() { let server = Server::new().await; diff --git a/milli/src/error.rs b/milli/src/error.rs index 8d55eabbd..a3c843254 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -124,6 +124,16 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco } )] InvalidSortableAttribute { field: String, valid_fields: BTreeSet }, + #[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.", + .field, + .valid_fields.iter().map(AsRef::as_ref).collect::>().join(", "), + .hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""), + )] + InvalidSearchableAttribute { + field: String, + valid_fields: BTreeSet, + hidden_fields: bool, + }, #[error("{}", HeedError::BadOpenOptions)] InvalidLmdbOpenOptions, #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")] diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 015940945..3accd8528 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -20,7 +20,7 @@ mod sort; #[cfg(test)] mod tests; -use std::collections::HashSet; +use std::collections::{BTreeSet, HashSet}; use bucket_sort::{bucket_sort, BucketSortOutput}; use charabia::TokenizerBuilder; @@ -44,6 +44,7 @@ use self::geo_sort::GeoSort; pub use self::geo_sort::Strategy as GeoSortStrategy; use self::graph_based_ranking_rule::Words; use self::interner::Interned; +use crate::error::FieldIdMapMissingEntry; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::search::new::distinct::apply_distinct_rule; use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError}; @@ -76,8 +77,56 @@ impl<'ctx> SearchContext<'ctx> { pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> { let fids_map = self.index.fields_ids_map(self.txn)?; - let restricted_fids = - searchable_attributes.iter().filter_map(|name| fids_map.id(name)).collect(); + let searchable_names = self.index.searchable_fields(self.txn)?; + + let mut restricted_fids = Vec::new(); + for field_name in searchable_attributes { + let searchable_contains_name = + searchable_names.as_ref().map(|sn| sn.iter().any(|name| name == field_name)); + let fid = match (fids_map.id(field_name), searchable_contains_name) { + // The Field id exist and the field is searchable + (Some(fid), Some(true)) | (Some(fid), None) => fid, + // The field is searchable but the Field id doesn't exist => Internal Error + (None, Some(true)) => { + return Err(FieldIdMapMissingEntry::FieldName { + field_name: field_name.to_string(), + process: "search", + } + .into()) + } + // The field is not searchable => User error + _otherwise => { + let mut valid_fields: BTreeSet<_> = + fids_map.names().map(String::from).collect(); + + // Filter by the searchable names + if let Some(sn) = searchable_names { + let searchable_names = sn.iter().map(|s| s.to_string()).collect(); + valid_fields = &valid_fields & &searchable_names; + } + + let searchable_count = valid_fields.len(); + + // Remove hidden fields + if let Some(dn) = self.index.displayed_fields(self.txn)? { + let displayable_names = dn.iter().map(|s| s.to_string()).collect(); + valid_fields = &valid_fields & &displayable_names; + } + + let hidden_fields = searchable_count > valid_fields.len(); + let field = field_name.to_string(); + return Err(UserError::InvalidSearchableAttribute { + field, + valid_fields, + hidden_fields, + } + .into()); + } + }; + + restricted_fids.push(fid); + } + self.restricted_fids = Some(restricted_fids); Ok(()) From 63ca25290b462c32ad542f659914f90835cd7ff7 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 20 Jun 2023 18:25:10 +0200 Subject: [PATCH 12/13] Take into account small Review requests --- milli/src/search/new/db_cache.rs | 48 ++++++++++++++------------------ 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs index e6b3b96bc..e0a2ba3cf 100644 --- a/milli/src/search/new/db_cache.rs +++ b/milli/src/search/new/db_cache.rs @@ -57,12 +57,9 @@ impl<'ctx> DatabaseCache<'ctx> { KC: BytesEncode<'v>, DC: BytesDecodeOwned, { - match cache.entry(cache_key) { - Entry::Occupied(_) => {} - Entry::Vacant(entry) => { - let bitmap_ptr = db.get(txn, db_key)?.map(Cow::Borrowed); - entry.insert(bitmap_ptr); - } + if let Entry::Vacant(entry) = cache.entry(cache_key) { + let bitmap_ptr = db.get(txn, db_key)?.map(Cow::Borrowed); + entry.insert(bitmap_ptr); } match cache.get(&cache_key).unwrap() { @@ -90,30 +87,27 @@ impl<'ctx> DatabaseCache<'ctx> { DC: BytesDecodeOwned, KC::EItem: Sized, { - match cache.entry(cache_key) { - Entry::Occupied(_) => {} - Entry::Vacant(entry) => { - let bitmap_ptr: Option> = match db_keys { - [] => None, - [key] => db.get(txn, key)?.map(Cow::Borrowed), - keys => { - let bitmaps = keys - .iter() - .filter_map(|key| db.get(txn, key).transpose()) - .map(|v| v.map(Cow::Borrowed)) - .collect::>, _>>()?; + if let Entry::Vacant(entry) = cache.entry(cache_key) { + let bitmap_ptr: Option> = match db_keys { + [] => None, + [key] => db.get(txn, key)?.map(Cow::Borrowed), + keys => { + let bitmaps = keys + .iter() + .filter_map(|key| db.get(txn, key).transpose()) + .map(|v| v.map(Cow::Borrowed)) + .collect::>, _>>()?; - if bitmaps.is_empty() { - None - } else { - Some(merger(&[], &bitmaps[..])?) - } + if bitmaps.is_empty() { + None + } else { + Some(merger(&[], &bitmaps[..])?) } - }; + } + }; - entry.insert(bitmap_ptr); - } - }; + entry.insert(bitmap_ptr); + } match cache.get(&cache_key).unwrap() { Some(Cow::Borrowed(bytes)) => { From 9d2a12821d098accf04c82ca943d43f3710943fe Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 22 Jun 2023 14:42:52 +0200 Subject: [PATCH 13/13] Use insta snapshot --- meilisearch/tests/common/index.rs | 27 +++-- meilisearch/tests/search/errors.rs | 16 +-- .../tests/search/restrict_searchable.rs | 112 +++++++++++------- 3 files changed, 95 insertions(+), 60 deletions(-) diff --git a/meilisearch/tests/common/index.rs b/meilisearch/tests/common/index.rs index beab9970b..517024c74 100644 --- a/meilisearch/tests/common/index.rs +++ b/meilisearch/tests/common/index.rs @@ -346,17 +346,24 @@ impl Index<'_> { query: Value, test: impl Fn(Value, StatusCode) + UnwindSafe + Clone, ) { - let (response, code) = self.search_post(query.clone()).await; - let t = test.clone(); - if let Err(e) = catch_unwind(move || t(response, code)) { - eprintln!("Error with post search"); - resume_unwind(e); - } + let post = self.search_post(query.clone()).await; + let query = yaup::to_string(&query).unwrap(); - let (response, code) = self.search_get(&query).await; - if let Err(e) = catch_unwind(move || test(response, code)) { - eprintln!("Error with get search"); - resume_unwind(e); + let get = self.search_get(&query).await; + + insta::allow_duplicates! { + let (response, code) = post; + let t = test.clone(); + if let Err(e) = catch_unwind(move || t(response, code)) { + eprintln!("Error with post search"); + resume_unwind(e); + } + + let (response, code) = get; + if let Err(e) = catch_unwind(move || test(response, code)) { + eprintln!("Error with get search"); + resume_unwind(e); + } } } diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index 72c2c5b90..273472d46 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -976,13 +976,15 @@ async fn search_on_unknown_field() { .search( json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}), |response, code| { - assert_eq!(400, code, "{}", response); - assert_eq!(response, json!({ - "message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", - "code": "invalid_attributes_to_search_on", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_attributes_to_search_on" - })); + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", + "code": "invalid_attributes_to_search_on", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_attributes_to_search_on" + } + "###); }, ) .await; diff --git a/meilisearch/tests/search/restrict_searchable.rs b/meilisearch/tests/search/restrict_searchable.rs index 9379b6558..f119acea5 100644 --- a/meilisearch/tests/search/restrict_searchable.rs +++ b/meilisearch/tests/search/restrict_searchable.rs @@ -1,3 +1,4 @@ +use meili_snap::{json_string, snapshot}; use once_cell::sync::Lazy; use serde_json::{json, Value}; @@ -41,8 +42,8 @@ async fn simple_search_on_title() { .search( json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"]}), |response, code| { - assert_eq!(200, code, "{}", response); - assert_eq!(response["hits"].as_array().unwrap().len(), 2); + snapshot!(code, @"200 OK"); + snapshot!(response["hits"].as_array().unwrap().len(), @"2"); }, ) .await; @@ -56,8 +57,8 @@ async fn simple_prefix_search_on_title() { // simple search should return 2 documents (ids: 2 and 3). index .search(json!({"q": "Captain Mar", "attributesToSearchOn": ["title"]}), |response, code| { - assert_eq!(200, code, "{}", response); - assert_eq!(response["hits"].as_array().unwrap().len(), 2); + snapshot!(code, @"200 OK"); + snapshot!(response["hits"].as_array().unwrap().len(), @"2"); }) .await; } @@ -69,8 +70,8 @@ async fn simple_search_on_title_matching_strategy_all() { // simple search matching strategy all should only return 1 document (ids: 2). index .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "matchingStrategy": "all"}), |response, code| { - assert_eq!(200, code, "{}", response); - assert_eq!(response["hits"].as_array().unwrap().len(), 1); + snapshot!(code, @"200 OK"); + snapshot!(response["hits"].as_array().unwrap().len(), @"1"); }) .await; } @@ -82,8 +83,8 @@ async fn simple_search_on_no_field() { // simple search on no field shouldn't return any document. index .search(json!({"q": "Captain Marvel", "attributesToSearchOn": []}), |response, code| { - assert_eq!(200, code, "{}", response); - assert_eq!(response["hits"].as_array().unwrap().len(), 0); + snapshot!(code, @"200 OK"); + snapshot!(response["hits"].as_array().unwrap().len(), @"0"); }) .await; } @@ -98,13 +99,18 @@ async fn word_ranking_rule_order() { .search( json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}), |response, code| { - assert_eq!(200, code, "{}", response); - assert_eq!( - response["hits"], - json!([ - {"id": "3"}, - {"id": "2"}, - ]) + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), + @r###" + [ + { + "id": "3" + }, + { + "id": "2" + } + ] + "### ); }, ) @@ -123,13 +129,18 @@ async fn word_ranking_rule_order_exact_words() { .search( json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}), |response, code| { - assert_eq!(200, code, "{}", response); - assert_eq!( - response["hits"], - json!([ - {"id": "3"}, - {"id": "2"}, - ]) + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), + @r###" + [ + { + "id": "3" + }, + { + "id": "2" + } + ] + "### ); }, ) @@ -158,13 +169,18 @@ async fn typo_ranking_rule_order() { // Document 2 should appear before document 1. index .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}), |response, code| { - assert_eq!(200, code, "{}", response); - assert_eq!( - response["hits"], - json!([ - {"id": "2"}, - {"id": "1"}, - ]) + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), + @r###" + [ + { + "id": "2" + }, + { + "id": "1" + } + ] + "### ); }) .await; @@ -194,13 +210,18 @@ async fn attributes_ranking_rule_order() { // Document 2 should appear before document 1. index .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["desc", "footer"], "attributesToRetrieve": ["id"]}), |response, code| { - assert_eq!(200, code, "{}", response); - assert_eq!( - response["hits"], - json!([ - {"id": "2"}, - {"id": "1"}, - ]) + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), + @r###" + [ + { + "id": "2" + }, + { + "id": "1" + } + ] + "### ); }) .await; @@ -228,13 +249,18 @@ async fn exactness_ranking_rule_order() { // Document 2 should appear before document 1. index .search(json!({"q": "Captain Marvel", "attributesToRetrieve": ["id"], "attributesToSearchOn": ["desc"]}), |response, code| { - assert_eq!(200, code, "{}", response); - assert_eq!( - response["hits"], - json!([ - {"id": "2"}, - {"id": "1"}, - ]) + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), + @r###" + [ + { + "id": "2" + }, + { + "id": "1" + } + ] + "### ); }) .await;