From 15a4c05379ee473f6b7a8c1447596307e59e63b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 2 May 2023 09:34:28 +0200 Subject: [PATCH 01/38] Store the facet string values in multiple FSTs --- milli/src/heed_codec/fst_set_codec.rs | 23 +++++++++++++++ milli/src/heed_codec/mod.rs | 2 ++ milli/src/index.rs | 7 ++++- milli/src/update/clear_documents.rs | 1 + milli/src/update/delete_documents.rs | 1 + milli/src/update/facet/mod.rs | 42 +++++++++++++++++++++++++-- 6 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 milli/src/heed_codec/fst_set_codec.rs diff --git a/milli/src/heed_codec/fst_set_codec.rs b/milli/src/heed_codec/fst_set_codec.rs new file mode 100644 index 000000000..a8d35ebda --- /dev/null +++ b/milli/src/heed_codec/fst_set_codec.rs @@ -0,0 +1,23 @@ +use fst::Set; +use std::borrow::Cow; + +use heed::{BytesDecode, BytesEncode}; + +/// A codec for values of type `Set<&[u8]>`. +pub struct FstSetCodec; + +impl<'a> BytesEncode<'a> for FstSetCodec { + type EItem = Set>; + + fn bytes_encode(item: &'a Self::EItem) -> Option> { + Some(Cow::Borrowed(item.as_fst().as_bytes())) + } +} + +impl<'a> BytesDecode<'a> for FstSetCodec { + type DItem = Set<&'a [u8]>; + + fn bytes_decode(bytes: &'a [u8]) -> Option { + Some(Set::new(bytes).ok()?) + } +} diff --git a/milli/src/heed_codec/mod.rs b/milli/src/heed_codec/mod.rs index c54168a36..666f68e28 100644 --- a/milli/src/heed_codec/mod.rs +++ b/milli/src/heed_codec/mod.rs @@ -2,6 +2,7 @@ mod beu32_str_codec; mod byte_slice_ref; pub mod facet; mod field_id_word_count_codec; +mod fst_set_codec; mod obkv_codec; mod roaring_bitmap; mod roaring_bitmap_length; @@ -15,6 +16,7 @@ pub use str_ref::StrRefCodec; pub use self::beu32_str_codec::BEU32StrCodec; pub use self::field_id_word_count_codec::FieldIdWordCountCodec; +pub use self::fst_set_codec::FstSetCodec; pub use self::obkv_codec::ObkvCodec; pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec}; pub use self::roaring_bitmap_length::{ diff --git a/milli/src/index.rs b/milli/src/index.rs index a22901993..5c32f75f5 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -21,7 +21,7 @@ use crate::heed_codec::facet::{ FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, FieldIdCodec, OrderedF64Codec, }; -use crate::heed_codec::{ScriptLanguageCodec, StrBEU16Codec, StrRefCodec}; +use crate::heed_codec::{FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec}; use crate::readable_slices::ReadableSlices; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, @@ -94,6 +94,7 @@ pub mod db_name { pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids"; pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids"; pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids"; + pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst"; pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s"; pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings"; pub const VECTOR_ID_DOCID: &str = "vector-id-docids"; @@ -154,6 +155,8 @@ pub struct Index { pub facet_id_f64_docids: Database, FacetGroupValueCodec>, /// Maps the facet field id and ranges of strings with the docids that corresponds to them. pub facet_id_string_docids: Database, FacetGroupValueCodec>, + /// Maps the facet field id of the string facets with an FST containing all the facets values. + pub facet_id_string_fst: Database, FstSetCodec>, /// Maps the document id, the facet field id and the numbers. pub field_id_docid_facet_f64s: Database, @@ -206,6 +209,7 @@ impl Index { let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?; let facet_id_string_docids = env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?; + let facet_id_string_fst = env.create_database(&mut wtxn, Some(FACET_ID_STRING_FST))?; let facet_id_exists_docids = env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?; let facet_id_is_null_docids = @@ -240,6 +244,7 @@ impl Index { field_id_word_count_docids, facet_id_f64_docids, facet_id_string_docids, + facet_id_string_fst, facet_id_exists_docids, facet_id_is_null_docids, facet_id_is_empty_docids, diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index f4a2d43fe..37c0f32b2 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -34,6 +34,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { script_language_docids, facet_id_f64_docids, facet_id_string_docids, + facet_id_string_fst: _, facet_id_exists_docids, facet_id_is_null_docids, facet_id_is_empty_docids, diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 766f0e16e..c9124e591 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -237,6 +237,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { word_prefix_fid_docids, facet_id_f64_docids: _, facet_id_string_docids: _, + facet_id_string_fst: _, field_id_docid_facet_f64s: _, field_id_docid_facet_strings: _, script_language_docids, diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index 2fd748d4d..4073ab8e5 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -78,15 +78,16 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5; use std::fs::File; +use heed::types::DecodeIgnore; use log::debug; use time::OffsetDateTime; use self::incremental::FacetsUpdateIncremental; use super::FacetsUpdateBulk; use crate::facet::FacetType; -use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec}; +use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; use crate::heed_codec::ByteSliceRefCodec; -use crate::{Index, Result}; +use crate::{Index, Result, BEU16}; pub mod bulk; pub mod delete; @@ -157,6 +158,43 @@ impl<'i> FacetsUpdate<'i> { ); incremental_update.execute(wtxn)?; } + + // We compute one FST by string facet + let mut text_fsts = vec![]; + let mut current_fst: Option<(u16, fst::SetBuilder>)> = None; + let database = self.index.facet_id_string_docids.remap_data_type::(); + for result in database.iter(&wtxn)? { + let (facet_group_key, _) = result?; + if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key { + current_fst = match current_fst.take() { + Some((fid, fst_builder)) if fid != field_id => { + let fst = fst_builder.into_set(); + text_fsts.push((field_id, fst)); + Some((field_id, fst::SetBuilder::memory())) + } + Some((field_id, fst_builder)) => Some((field_id, fst_builder)), + None => Some((field_id, fst::SetBuilder::memory())), + }; + + if let Some((_, fst_builder)) = current_fst.as_mut() { + fst_builder.insert(left_bound)?; + } + } + } + + if let Some((field_id, fst_builder)) = current_fst { + let fst = fst_builder.into_set(); + text_fsts.push((field_id, fst)); + } + + // We remove all of the previous FSTs that were in this database + self.index.facet_id_string_fst.clear(wtxn)?; + + // We write those FSTs in LMDB now + for (field_id, fst) in text_fsts { + self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?; + } + Ok(()) } } From c34de05106adeb75a84cec442d4cdb6eac49aa53 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Thu, 13 Apr 2023 17:38:20 +0200 Subject: [PATCH 02/38] Introduce the SearchForFacetValue struct --- milli/src/lib.rs | 5 +- milli/src/search/mod.rs | 103 ++++++++++++++++++++++++++++++++++++- milli/src/update/facets.rs | 1 - 3 files changed, 105 insertions(+), 4 deletions(-) delete mode 100644 milli/src/update/facets.rs diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 99126f60e..6f14977ec 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -57,8 +57,9 @@ pub use self::heed_codec::{ }; pub use self::index::Index; pub use self::search::{ - FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, Search, - SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, + FacetDistribution, FacetSearchResult, Filter, FormatOptions, MatchBounds, MatcherBuilder, + MatchingWords, Search, SearchForFacetValue, SearchResult, TermsMatchingStrategy, + DEFAULT_VALUES_PER_FACET, }; pub type Result = std::result::Result; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index dc25c0f23..1e648d241 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -1,5 +1,7 @@ use std::fmt; +use fst::automaton::{Complement, Intersection, StartsWith, Str, Union}; +use fst::Streamer; use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; @@ -7,9 +9,11 @@ use roaring::bitmap::RoaringBitmap; pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET}; pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords}; use self::new::PartialSearchResult; +use crate::error::UserError; +use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::{ - execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext, + execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext, BEU16, }; // Building these factories is not free. @@ -234,6 +238,103 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA { } } +pub struct SearchForFacetValue<'a> { + query: Option, + facet: String, + search_query: Search<'a>, +} + +impl<'a> SearchForFacetValue<'a> { + fn new(facet: String, search_query: Search<'a>) -> SearchForFacetValue<'a> { + SearchForFacetValue { query: None, facet, search_query } + } + + fn query(&mut self, query: impl Into) -> &mut Self { + self.query = Some(query.into()); + self + } + + fn execute(&self) -> Result> { + let index = self.search_query.index; + let rtxn = self.search_query.rtxn; + + let sortable_fields = index.sortable_fields(rtxn)?; + if !sortable_fields.contains(&self.facet) { + // TODO create a new type of error + return Err(UserError::InvalidSortableAttribute { + field: self.facet.clone(), + valid_fields: sortable_fields.into_iter().collect(), + })?; + } + + let fields_ids_map = index.fields_ids_map(rtxn)?; + let (field_id, fst) = match fields_ids_map.id(&self.facet) { + Some(fid) => { + match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? { + Some(fst) => (fid, fst), + None => todo!("return an error, is the user trying to search in numbers?"), + } + } + None => todo!("return an internal error bug"), + }; + + let search_candidates = self.search_query.execute()?.candidates; + + match self.query.as_ref() { + Some(query) => { + let is_prefix = true; + let starts = StartsWith(Str::new(get_first(query))); + let first = Intersection(build_dfa(query, 1, is_prefix), Complement(&starts)); + let second_dfa = build_dfa(query, 2, is_prefix); + let second = Intersection(&second_dfa, &starts); + let automaton = Union(first, &second); + + let mut stream = fst.search(automaton).into_stream(); + let mut result = vec![]; + while let Some(facet_value) = stream.next() { + let value = std::str::from_utf8(facet_value)?; + let key = FacetGroupKey { field_id, level: 0, left_bound: value }; + let docids = match index.facet_id_string_docids.get(rtxn, &key)? { + Some(FacetGroupValue { bitmap, .. }) => bitmap, + None => todo!("return an internal error"), + }; + let count = search_candidates.intersection_len(&docids); + if count != 0 { + result.push(FacetSearchResult { value: value.to_string(), count }); + } + } + + Ok(result) + } + None => { + let mut stream = fst.stream(); + let mut result = vec![]; + while let Some(facet_value) = stream.next() { + let value = std::str::from_utf8(facet_value)?; + let key = FacetGroupKey { field_id, level: 0, left_bound: value }; + let docids = match index.facet_id_string_docids.get(rtxn, &key)? { + Some(FacetGroupValue { bitmap, .. }) => bitmap, + None => todo!("return an internal error"), + }; + let count = search_candidates.intersection_len(&docids); + if count != 0 { + result.push(FacetSearchResult { value: value.to_string(), count }); + } + } + + Ok(result) + } + } + } +} + +pub struct FacetSearchResult { + /// The original facet value + pub value: String, + /// The number of documents associated to this facet + pub count: u64, +} + #[cfg(test)] mod test { #[allow(unused_imports)] diff --git a/milli/src/update/facets.rs b/milli/src/update/facets.rs deleted file mode 100644 index 8b1378917..000000000 --- a/milli/src/update/facets.rs +++ /dev/null @@ -1 +0,0 @@ - From addb21f110697c1efc3082db449e21d294d73cf3 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Thu, 13 Apr 2023 18:16:08 +0200 Subject: [PATCH 03/38] Restrict the number of facet search results to 1000 --- milli/src/search/mod.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 1e648d241..184ba409e 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -21,6 +21,9 @@ static LEVDIST0: Lazy = Lazy::new(|| LevBuilder::new(0, true)); static LEVDIST1: Lazy = Lazy::new(|| LevBuilder::new(1, true)); static LEVDIST2: Lazy = Lazy::new(|| LevBuilder::new(2, true)); +/// The maximum number of facets returned by the facet search route. +const MAX_NUMBER_OF_FACETS: usize = 1000; + pub mod facet; mod fst_utils; pub mod new; @@ -291,6 +294,7 @@ impl<'a> SearchForFacetValue<'a> { let mut stream = fst.search(automaton).into_stream(); let mut result = vec![]; + let mut length = 0; while let Some(facet_value) = stream.next() { let value = std::str::from_utf8(facet_value)?; let key = FacetGroupKey { field_id, level: 0, left_bound: value }; @@ -301,6 +305,10 @@ impl<'a> SearchForFacetValue<'a> { let count = search_candidates.intersection_len(&docids); if count != 0 { result.push(FacetSearchResult { value: value.to_string(), count }); + length += 1; + } + if length >= MAX_NUMBER_OF_FACETS { + break; } } @@ -309,6 +317,7 @@ impl<'a> SearchForFacetValue<'a> { None => { let mut stream = fst.stream(); let mut result = vec![]; + let mut length = 0; while let Some(facet_value) = stream.next() { let value = std::str::from_utf8(facet_value)?; let key = FacetGroupKey { field_id, level: 0, left_bound: value }; @@ -319,6 +328,10 @@ impl<'a> SearchForFacetValue<'a> { let count = search_candidates.intersection_len(&docids); if count != 0 { result.push(FacetSearchResult { value: value.to_string(), count }); + length += 1; + } + if length >= MAX_NUMBER_OF_FACETS { + break; } } From ce7e7f12c8fd947f6225f7578ea373ce7e2ec94a Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Thu, 13 Apr 2023 18:16:33 +0200 Subject: [PATCH 04/38] Introduce the facet search route --- .../src/routes/indexes/facet_search.rs | 178 ++++++++++++++++++ meilisearch/src/routes/indexes/mod.rs | 2 + meilisearch/src/search.rs | 48 ++++- milli/src/search/mod.rs | 1 + 4 files changed, 220 insertions(+), 9 deletions(-) create mode 100644 meilisearch/src/routes/indexes/facet_search.rs diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs new file mode 100644 index 000000000..be3af4f3e --- /dev/null +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -0,0 +1,178 @@ +use std::collections::{BTreeSet, HashSet}; + +use actix_web::web::Data; +use actix_web::{web, HttpRequest, HttpResponse}; +use deserr::actix_web::{AwebJson, AwebQueryParameter}; +use index_scheduler::IndexScheduler; +use log::debug; +use meilisearch_types::deserr::query_params::Param; +use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; +use meilisearch_types::error::deserr_codes::*; +use meilisearch_types::error::ResponseError; +use meilisearch_types::index_uid::IndexUid; +use meilisearch_types::milli::facet; +use meilisearch_types::serde_cs::vec::CS; +use serde_json::Value; + +use crate::analytics::{Analytics, SearchAggregator}; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; +use crate::search::{ + add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, + DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, + DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, +}; + +pub fn configure(cfg: &mut web::ServiceConfig) { + cfg.service(web::resource("").route(web::post().to(search))); +} + +// #[derive(Debug, deserr::Deserr)] +// #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)] +// pub struct FacetSearchQuery { +// #[deserr(default, error = DeserrQueryParamError)] +// facetQuery: Option, +// #[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError)] +// offset: Param, +// #[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError)] +// limit: Param, +// #[deserr(default, error = DeserrQueryParamError)] +// page: Option>, +// #[deserr(default, error = DeserrQueryParamError)] +// hits_per_page: Option>, +// #[deserr(default, error = DeserrQueryParamError)] +// attributes_to_retrieve: Option>, +// #[deserr(default, error = DeserrQueryParamError)] +// attributes_to_crop: Option>, +// #[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError)] +// crop_length: Param, +// #[deserr(default, error = DeserrQueryParamError)] +// attributes_to_highlight: Option>, +// #[deserr(default, error = DeserrQueryParamError)] +// filter: Option, +// #[deserr(default, error = DeserrQueryParamError)] +// sort: Option, +// #[deserr(default, error = DeserrQueryParamError)] +// show_matches_position: Param, +// #[deserr(default, error = DeserrQueryParamError)] +// facets: Option>, +// #[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError)] +// highlight_pre_tag: String, +// #[deserr( default = DEFAULT_HIGHLIGHT_POST_TAG(), error = DeserrQueryParamError)] +// highlight_post_tag: String, +// #[deserr(default = DEFAULT_CROP_MARKER(), error = DeserrQueryParamError)] +// crop_marker: String, +// #[deserr(default, error = DeserrQueryParamError)] +// matching_strategy: MatchingStrategy, +// } + +// TODO improve the error messages +#[derive(Debug, Clone, Default, PartialEq, Eq, deserr::Deserr)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct FacetSearchQuery { + #[deserr(default, error = DeserrJsonError)] + pub facet_query: Option, + #[deserr(default, error = DeserrJsonError)] + pub facet_name: String, + #[deserr(default, error = DeserrJsonError)] + pub q: Option, + #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] + pub offset: usize, + #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError)] + pub limit: usize, + #[deserr(default, error = DeserrJsonError)] + pub page: Option, + #[deserr(default, error = DeserrJsonError)] + pub hits_per_page: Option, + #[deserr(default, error = DeserrJsonError)] + pub attributes_to_retrieve: Option>, + #[deserr(default, error = DeserrJsonError)] + pub attributes_to_crop: Option>, + #[deserr(default, error = DeserrJsonError, default = DEFAULT_CROP_LENGTH())] + pub crop_length: usize, + #[deserr(default, error = DeserrJsonError)] + pub attributes_to_highlight: Option>, + #[deserr(default, error = DeserrJsonError, default)] + pub show_matches_position: bool, + #[deserr(default, error = DeserrJsonError)] + pub filter: Option, + #[deserr(default, error = DeserrJsonError)] + pub sort: Option>, + #[deserr(default, error = DeserrJsonError)] + pub facets: Option>, + #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_PRE_TAG())] + pub highlight_pre_tag: String, + #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_POST_TAG())] + pub highlight_post_tag: String, + #[deserr(default, error = DeserrJsonError, default = DEFAULT_CROP_MARKER())] + pub crop_marker: String, + #[deserr(default, error = DeserrJsonError, default)] + pub matching_strategy: MatchingStrategy, +} + +pub async fn search( + index_scheduler: GuardedData, Data>, + index_uid: web::Path, + params: AwebJson, + req: HttpRequest, + analytics: web::Data, +) -> Result { + let index_uid = IndexUid::try_from(index_uid.into_inner())?; + + let mut query = params.into_inner(); + debug!("facet search called with params: {:?}", query); + + let facet_query = query.facet_query.clone(); + let facet_name = query.facet_name.clone(); + let mut search_query = SearchQuery::from(query); + + // Tenant token search_rules. + if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { + add_search_rules(&mut search_query, search_rules); + } + + // TODO log stuff + // let mut aggregate = SearchAggregator::from_query(&query, &req); + + let index = index_scheduler.index(&index_uid)?; + let search_result = tokio::task::spawn_blocking(move || { + perform_facet_search(&index, search_query, facet_query, facet_name) + }) + .await?; + + // TODO log stuff + // if let Ok(ref search_result) = search_result { + // aggregate.succeed(search_result); + // } + // TODO analytics + // analytics.post_search(aggregate); + + let search_result = search_result?; + + debug!("returns: {:?}", search_result); + Ok(HttpResponse::Ok().json(search_result)) +} + +impl From for SearchQuery { + fn from(value: FacetSearchQuery) -> Self { + SearchQuery { + q: value.q, + offset: value.offset, + limit: value.limit, + page: value.page, + hits_per_page: value.hits_per_page, + attributes_to_retrieve: value.attributes_to_retrieve, + attributes_to_crop: value.attributes_to_crop, + crop_length: value.crop_length, + attributes_to_highlight: value.attributes_to_highlight, + show_matches_position: value.show_matches_position, + filter: value.filter, + sort: value.sort, + facets: value.facets, + highlight_pre_tag: value.highlight_pre_tag, + highlight_post_tag: value.highlight_post_tag, + crop_marker: value.crop_marker, + matching_strategy: value.matching_strategy, + } + } +} diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index ba925b3d5..81b5c3f2e 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -24,6 +24,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; pub mod documents; +pub mod facet_search; pub mod search; pub mod settings; @@ -44,6 +45,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats)))) .service(web::scope("/documents").configure(documents::configure)) .service(web::scope("/search").configure(search::configure)) + .service(web::scope("/facet-search").configure(facet_search::configure)) .service(web::scope("/settings").configure(settings::configure)), ); } diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 87cfdadb3..2093a27ef 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -3,6 +3,7 @@ use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::str::FromStr; use std::time::Instant; +use actix_http::header::q; use deserr::Deserr; use either::Either; use index_scheduler::RoFeatures; @@ -10,9 +11,10 @@ use log::warn; use meilisearch_auth::IndexSearchRules; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::*; +use meilisearch_types::heed::RoTxn; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy}; -use meilisearch_types::milli::{dot_product_similarity, InternalError}; +use meilisearch_types::milli::{dot_product_similarity, FacetSearchResult, InternalError}; use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::{milli, Document}; use milli::tokenizer::TokenizerBuilder; @@ -26,6 +28,7 @@ use serde::Serialize; use serde_json::{json, Value}; use crate::error::MeilisearchHttpError; +use crate::routes::indexes::facet_search::FacetSearchQuery; type MatchesPosition = BTreeMap>; @@ -199,7 +202,7 @@ impl SearchQueryWithIndex { } } -#[derive(Debug, Clone, PartialEq, Eq, Deserr)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)] #[deserr(rename_all = camelCase)] pub enum MatchingStrategy { /// Remove query words from last to first @@ -298,14 +301,12 @@ pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) { } } -pub fn perform_search( - index: &Index, - query: SearchQuery, +fn prepare_search<'t>( + index: &'t Index, + rtxn: &'t RoTxn, + query: &'t SearchQuery, features: RoFeatures, -) -> Result { - let before_search = Instant::now(); - let rtxn = index.read_txn()?; - +) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> { let mut search = index.search(&rtxn); if query.vector.is_some() && query.q.is_some() { @@ -383,6 +384,20 @@ pub fn perform_search( search.sort_criteria(sort); } + Ok((search, is_finite_pagination, max_total_hits, offset)) +} + +pub fn perform_search( + index: &Index, + query: SearchQuery, + features: RoFeatures, +) -> Result { + let before_search = Instant::now(); + let rtxn = index.read_txn()?; + + let (search, is_finite_pagination, max_total_hits, offset) = + prepare_search(index, &rtxn, &query, features)?; + let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } = search.execute()?; @@ -557,6 +572,21 @@ pub fn perform_search( Ok(result) } +pub fn perform_facet_search( + index: &Index, + search_query: SearchQuery, + facet_query: Option, + facet_name: String, +) -> Result, MeilisearchHttpError> { + let before_search = Instant::now(); + let rtxn = index.read_txn()?; + + let (search, is_finite_pagination, max_total_hits, offset) = + prepare_search(index, &rtxn, &search_query)?; + + todo!("Execute the search") +} + fn insert_geo_distance(sorts: &[String], document: &mut Document) { lazy_static::lazy_static! { static ref GEO_REGEX: Regex = diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 184ba409e..85ba3df3a 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -341,6 +341,7 @@ impl<'a> SearchForFacetValue<'a> { } } +#[derive(Debug, serde::Serialize)] pub struct FacetSearchResult { /// The original facet value pub value: String, From e81809aae70d19a32a1c1708f68b774a0ccaf70e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 26 Apr 2023 11:55:43 +0200 Subject: [PATCH 05/38] Make the search for facet work --- meilisearch/src/routes/indexes/facet_search.rs | 2 +- meilisearch/src/search.rs | 15 ++++++++++----- milli/src/search/mod.rs | 12 ++++++------ 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index be3af4f3e..16479c755 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -119,7 +119,7 @@ pub async fn search( ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; - let mut query = params.into_inner(); + let query = params.into_inner(); debug!("facet search called with params: {:?}", query); let facet_query = query.facet_query.clone(); diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 2093a27ef..cd6a0bc54 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -14,7 +14,9 @@ use meilisearch_types::error::deserr_codes::*; use meilisearch_types::heed::RoTxn; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy}; -use meilisearch_types::milli::{dot_product_similarity, FacetSearchResult, InternalError}; +use meilisearch_types::milli::{ + dot_product_similarity, facet, FacetSearchResult, InternalError, SearchForFacetValue, +}; use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::{milli, Document}; use milli::tokenizer::TokenizerBuilder; @@ -575,16 +577,19 @@ pub fn perform_search( pub fn perform_facet_search( index: &Index, search_query: SearchQuery, - facet_query: Option, + mut facet_query: Option, facet_name: String, ) -> Result, MeilisearchHttpError> { let before_search = Instant::now(); let rtxn = index.read_txn()?; - let (search, is_finite_pagination, max_total_hits, offset) = - prepare_search(index, &rtxn, &search_query)?; + let (search, _, _, _) = prepare_search(index, &rtxn, &search_query)?; + let mut facet_search = SearchForFacetValue::new(facet_name, search); + if let Some(facet_query) = facet_query.take() { + facet_search.query(facet_query); + } - todo!("Execute the search") + facet_search.execute().map_err(Into::into) } fn insert_geo_distance(sorts: &[String], document: &mut Document) { diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 85ba3df3a..bc88a0ff8 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -248,25 +248,25 @@ pub struct SearchForFacetValue<'a> { } impl<'a> SearchForFacetValue<'a> { - fn new(facet: String, search_query: Search<'a>) -> SearchForFacetValue<'a> { + pub fn new(facet: String, search_query: Search<'a>) -> SearchForFacetValue<'a> { SearchForFacetValue { query: None, facet, search_query } } - fn query(&mut self, query: impl Into) -> &mut Self { + pub fn query(&mut self, query: impl Into) -> &mut Self { self.query = Some(query.into()); self } - fn execute(&self) -> Result> { + pub fn execute(&self) -> Result> { let index = self.search_query.index; let rtxn = self.search_query.rtxn; - let sortable_fields = index.sortable_fields(rtxn)?; - if !sortable_fields.contains(&self.facet) { + let filterable_fields = index.filterable_fields(rtxn)?; + if !filterable_fields.contains(&self.facet) { // TODO create a new type of error return Err(UserError::InvalidSortableAttribute { field: self.facet.clone(), - valid_fields: sortable_fields.into_iter().collect(), + valid_fields: filterable_fields.into_iter().collect(), })?; } From 893592c5e97b32c5d738d383c15cd8f187abc6cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 26 Apr 2023 17:08:55 +0200 Subject: [PATCH 06/38] Send analytics about the facet-search route --- meilisearch/src/analytics/mock_analytics.rs | 13 + meilisearch/src/analytics/mod.rs | 7 + .../src/analytics/segment_analytics.rs | 230 +++++++++++++++--- .../src/routes/indexes/facet_search.rs | 17 +- 4 files changed, 225 insertions(+), 42 deletions(-) diff --git a/meilisearch/src/analytics/mock_analytics.rs b/meilisearch/src/analytics/mock_analytics.rs index 68c3a7dff..4bd190f87 100644 --- a/meilisearch/src/analytics/mock_analytics.rs +++ b/meilisearch/src/analytics/mock_analytics.rs @@ -38,6 +38,18 @@ impl MultiSearchAggregator { pub fn succeed(&mut self) {} } +#[derive(Default)] +pub struct FacetSearchAggregator; + +#[allow(dead_code)] +impl FacetSearchAggregator { + pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self { + Self::default() + } + + pub fn succeed(&mut self, _: &dyn Any) {} +} + impl MockAnalytics { #[allow(clippy::new_ret_no_self)] pub fn new(opt: &Opt) -> Arc { @@ -56,6 +68,7 @@ impl Analytics for MockAnalytics { fn get_search(&self, _aggregate: super::SearchAggregator) {} fn post_search(&self, _aggregate: super::SearchAggregator) {} fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {} + fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {} fn add_documents( &self, _documents_query: &UpdateDocumentsQuery, diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index c48564dff..86f9c1abe 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -25,6 +25,8 @@ pub type SegmentAnalytics = mock_analytics::MockAnalytics; pub type SearchAggregator = mock_analytics::SearchAggregator; #[cfg(any(debug_assertions, not(feature = "analytics")))] pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator; +#[cfg(any(debug_assertions, not(feature = "analytics")))] +pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator; // if we are in release mode and the feature analytics was enabled // we use the real analytics @@ -34,6 +36,8 @@ pub type SegmentAnalytics = segment_analytics::SegmentAnalytics; pub type SearchAggregator = segment_analytics::SearchAggregator; #[cfg(all(not(debug_assertions), feature = "analytics"))] pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator; +#[cfg(all(not(debug_assertions), feature = "analytics"))] +pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator; /// The Meilisearch config dir: /// `~/.config/Meilisearch` on *NIX or *BSD. @@ -88,6 +92,9 @@ pub trait Analytics: Sync + Send { /// This method should be called to aggregate a post array of searches fn post_multi_search(&self, aggregate: MultiSearchAggregator); + /// This method should be called to aggregate post facet values searches + fn post_facet_search(&self, aggregate: FacetSearchAggregator); + // this method should be called to aggregate a add documents request fn add_documents( &self, diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 9a96c4650..4508fa26e 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -1,5 +1,6 @@ use std::collections::{BinaryHeap, HashMap, HashSet}; use std::fs; +use std::mem::take; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -29,11 +30,13 @@ use super::{ use crate::analytics::Analytics; use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, ScheduleSnapshot}; use crate::routes::indexes::documents::UpdateDocumentsQuery; +use crate::routes::indexes::facet_search::FacetSearchQuery; use crate::routes::tasks::TasksFilterQuery; use crate::routes::{create_all_stats, Stats}; use crate::search::{ - SearchQuery, SearchQueryWithIndex, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, - DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, + FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult, + DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, + DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, }; use crate::Opt; @@ -71,6 +74,7 @@ pub enum AnalyticsMsg { AggregateGetSearch(SearchAggregator), AggregatePostSearch(SearchAggregator), AggregatePostMultiSearch(MultiSearchAggregator), + AggregatePostFacetSearch(FacetSearchAggregator), AggregateAddDocuments(DocumentsAggregator), AggregateDeleteDocuments(DocumentsDeletionAggregator), AggregateUpdateDocuments(DocumentsAggregator), @@ -139,6 +143,7 @@ impl SegmentAnalytics { batcher, post_search_aggregator: SearchAggregator::default(), post_multi_search_aggregator: MultiSearchAggregator::default(), + post_facet_search_aggregator: FacetSearchAggregator::default(), get_search_aggregator: SearchAggregator::default(), add_documents_aggregator: DocumentsAggregator::default(), delete_documents_aggregator: DocumentsDeletionAggregator::default(), @@ -182,6 +187,10 @@ impl super::Analytics for SegmentAnalytics { let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate)); } + fn post_facet_search(&self, aggregate: FacetSearchAggregator) { + let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate)); + } + fn post_multi_search(&self, aggregate: MultiSearchAggregator) { let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate)); } @@ -354,6 +363,7 @@ pub struct Segment { get_search_aggregator: SearchAggregator, post_search_aggregator: SearchAggregator, post_multi_search_aggregator: MultiSearchAggregator, + post_facet_search_aggregator: FacetSearchAggregator, add_documents_aggregator: DocumentsAggregator, delete_documents_aggregator: DocumentsDeletionAggregator, update_documents_aggregator: DocumentsAggregator, @@ -418,6 +428,7 @@ impl Segment { Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg), + Some(AnalyticsMsg::AggregatePostFacetSearch(agreg)) => self.post_facet_search_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg), @@ -461,55 +472,72 @@ impl Segment { }) .await; } - let get_search = std::mem::take(&mut self.get_search_aggregator) - .into_event(&self.user, "Documents Searched GET"); - let post_search = std::mem::take(&mut self.post_search_aggregator) - .into_event(&self.user, "Documents Searched POST"); - let post_multi_search = std::mem::take(&mut self.post_multi_search_aggregator) - .into_event(&self.user, "Documents Searched by Multi-Search POST"); - let add_documents = std::mem::take(&mut self.add_documents_aggregator) - .into_event(&self.user, "Documents Added"); - let delete_documents = std::mem::take(&mut self.delete_documents_aggregator) - .into_event(&self.user, "Documents Deleted"); - let update_documents = std::mem::take(&mut self.update_documents_aggregator) - .into_event(&self.user, "Documents Updated"); - let get_fetch_documents = std::mem::take(&mut self.get_fetch_documents_aggregator) - .into_event(&self.user, "Documents Fetched GET"); - let post_fetch_documents = std::mem::take(&mut self.post_fetch_documents_aggregator) - .into_event(&self.user, "Documents Fetched POST"); - let get_tasks = - std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen"); - let health = - std::mem::take(&mut self.health_aggregator).into_event(&self.user, "Health Seen"); - if let Some(get_search) = get_search { + let Segment { + inbox: _, + opt: _, + batcher: _, + user, + get_search_aggregator, + post_search_aggregator, + post_multi_search_aggregator, + post_facet_search_aggregator, + add_documents_aggregator, + delete_documents_aggregator, + update_documents_aggregator, + get_fetch_documents_aggregator, + post_fetch_documents_aggregator, + get_tasks_aggregator, + health_aggregator, + } = self; + + if let Some(get_search) = + take(get_search_aggregator).into_event(&user, "Documents Searched GET") + { let _ = self.batcher.push(get_search).await; } - if let Some(post_search) = post_search { + if let Some(post_search) = + take(post_search_aggregator).into_event(&user, "Documents Searched POST") + { let _ = self.batcher.push(post_search).await; } - if let Some(post_multi_search) = post_multi_search { + if let Some(post_multi_search) = take(post_multi_search_aggregator) + .into_event(&user, "Documents Searched by Multi-Search POST") + { let _ = self.batcher.push(post_multi_search).await; } - if let Some(add_documents) = add_documents { + if let Some(post_facet_search) = take(post_facet_search_aggregator) + .into_event(&user, "Documents Searched by Facet-Search POST") + { + let _ = self.batcher.push(post_facet_search).await; + } + if let Some(add_documents) = + take(add_documents_aggregator).into_event(&user, "Documents Added") + { let _ = self.batcher.push(add_documents).await; } - if let Some(delete_documents) = delete_documents { + if let Some(delete_documents) = + take(delete_documents_aggregator).into_event(&user, "Documents Deleted") + { let _ = self.batcher.push(delete_documents).await; } - if let Some(update_documents) = update_documents { + if let Some(update_documents) = + take(update_documents_aggregator).into_event(&user, "Documents Updated") + { let _ = self.batcher.push(update_documents).await; } - if let Some(get_fetch_documents) = get_fetch_documents { + if let Some(get_fetch_documents) = + take(get_fetch_documents_aggregator).into_event(&user, "Documents Fetched GET") { let _ = self.batcher.push(get_fetch_documents).await; } - if let Some(post_fetch_documents) = post_fetch_documents { + if let Some(post_fetch_documents) = + take(post_fetch_documents_aggregator).into_event(&user, "Documents Fetched POST") { let _ = self.batcher.push(post_fetch_documents).await; } - if let Some(get_tasks) = get_tasks { + if let Some(get_tasks) = take(get_tasks_aggregator).into_event(&user, "Tasks Seen") { let _ = self.batcher.push(get_tasks).await; } - if let Some(health) = health { + if let Some(health) = take(health_aggregator).into_event(&user, "Health Seen") { let _ = self.batcher.push(health).await; } let _ = self.batcher.flush().await; @@ -909,6 +937,144 @@ impl MultiSearchAggregator { } } +#[derive(Default)] +pub struct FacetSearchAggregator { + timestamp: Option, + + // context + user_agents: HashSet, + + // requests + total_received: usize, + total_succeeded: usize, + time_spent: BinaryHeap, + + // The set of all facetNames that were used + facet_names: HashSet, + + // As there been any other parameter than the facetName or facetQuery ones? + additional_search_parameters_provided: bool, +} + +impl FacetSearchAggregator { + pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self { + let FacetSearchQuery { + facet_query: _, + facet_name, + q, + offset, + limit, + page, + hits_per_page, + attributes_to_retrieve, + attributes_to_crop, + crop_length, + attributes_to_highlight, + show_matches_position, + filter, + sort, + facets, + highlight_pre_tag, + highlight_post_tag, + crop_marker, + matching_strategy, + } = query; + + let mut ret = Self::default(); + ret.timestamp = Some(OffsetDateTime::now_utc()); + + ret.total_received = 1; + ret.user_agents = extract_user_agents(request).into_iter().collect(); + ret.facet_names = Some(facet_name.clone()).into_iter().collect(); + + ret.additional_search_parameters_provided = q.is_some() + || *offset != DEFAULT_SEARCH_OFFSET() + || *limit != DEFAULT_SEARCH_LIMIT() + || page.is_some() + || hits_per_page.is_some() + || attributes_to_retrieve.is_some() + || attributes_to_crop.is_some() + || *crop_length != DEFAULT_CROP_LENGTH() + || attributes_to_highlight.is_some() + || *show_matches_position + || filter.is_some() + || sort.is_some() + || facets.is_some() + || *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG() + || *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG() + || *crop_marker != DEFAULT_CROP_MARKER() + || *matching_strategy != MatchingStrategy::default(); + + ret + } + + pub fn succeed(&mut self, result: &FacetSearchResult) { + self.total_succeeded = self.total_succeeded.saturating_add(1); + self.time_spent.push(result.processing_time_ms as usize); + } + + /// Aggregate one [SearchAggregator] into another. + pub fn aggregate(&mut self, mut other: Self) { + if self.timestamp.is_none() { + self.timestamp = other.timestamp; + } + + // context + for user_agent in other.user_agents.into_iter() { + self.user_agents.insert(user_agent); + } + + // request + self.total_received = self.total_received.saturating_add(other.total_received); + self.total_succeeded = self.total_succeeded.saturating_add(other.total_succeeded); + self.time_spent.append(&mut other.time_spent); + + // facet_names + for facet_name in other.facet_names.into_iter() { + self.facet_names.insert(facet_name); + } + + // additional_search_parameters_provided + self.additional_search_parameters_provided = self.additional_search_parameters_provided + | other.additional_search_parameters_provided; + } + + pub fn into_event(self, user: &User, event_name: &str) -> Option { + if self.total_received == 0 { + None + } else { + // the index of the 99th percentage of value + let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.; + // we get all the values in a sorted manner + let time_spent = self.time_spent.into_sorted_vec(); + // We are only interested by the slowest value of the 99th fastest results + let time_spent = time_spent.get(percentile_99th as usize); + + let properties = json!({ + "user-agent": self.user_agents, + "requests": { + "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), + "total_succeeded": self.total_succeeded, + "total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics + "total_received": self.total_received, + }, + "facets": { + "total_distinct_facet_count": self.facet_names.len(), + }, + "additional_search_parameters_provided": self.additional_search_parameters_provided, + }); + + Some(Track { + timestamp: self.timestamp, + user: user.clone(), + event: event_name.to_string(), + properties, + ..Default::default() + }) + } + } +} + #[derive(Default)] pub struct DocumentsAggregator { timestamp: Option, diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 16479c755..947454d12 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -14,7 +14,7 @@ use meilisearch_types::milli::facet; use meilisearch_types::serde_cs::vec::CS; use serde_json::Value; -use crate::analytics::{Analytics, SearchAggregator}; +use crate::analytics::{Analytics, FacetSearchAggregator}; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::search::{ @@ -122,6 +122,8 @@ pub async fn search( let query = params.into_inner(); debug!("facet search called with params: {:?}", query); + let mut aggregate = FacetSearchAggregator::from_query(&query, &req); + let facet_query = query.facet_query.clone(); let facet_name = query.facet_name.clone(); let mut search_query = SearchQuery::from(query); @@ -131,21 +133,16 @@ pub async fn search( add_search_rules(&mut search_query, search_rules); } - // TODO log stuff - // let mut aggregate = SearchAggregator::from_query(&query, &req); - let index = index_scheduler.index(&index_uid)?; let search_result = tokio::task::spawn_blocking(move || { perform_facet_search(&index, search_query, facet_query, facet_name) }) .await?; - // TODO log stuff - // if let Ok(ref search_result) = search_result { - // aggregate.succeed(search_result); - // } - // TODO analytics - // analytics.post_search(aggregate); + if let Ok(ref search_result) = search_result { + aggregate.succeed(search_result); + } + analytics.post_facet_search(aggregate); let search_result = search_result?; From 93f30e65a91beb80fe19736fa655c1577109795b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 26 Apr 2023 17:09:59 +0200 Subject: [PATCH 07/38] Return the correct response JSON object from the facet-search route --- .../src/routes/indexes/facet_search.rs | 46 +------------------ meilisearch/src/search.rs | 26 ++++++++--- milli/src/heed_codec/fst_set_codec.rs | 2 +- milli/src/lib.rs | 2 +- milli/src/search/mod.rs | 10 ++-- 5 files changed, 28 insertions(+), 58 deletions(-) diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 947454d12..3f97e2858 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -2,16 +2,13 @@ use std::collections::{BTreeSet, HashSet}; use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; -use deserr::actix_web::{AwebJson, AwebQueryParameter}; +use deserr::actix_web::AwebJson; use index_scheduler::IndexScheduler; use log::debug; -use meilisearch_types::deserr::query_params::Param; -use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; +use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; -use meilisearch_types::milli::facet; -use meilisearch_types::serde_cs::vec::CS; use serde_json::Value; use crate::analytics::{Analytics, FacetSearchAggregator}; @@ -27,45 +24,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(search))); } -// #[derive(Debug, deserr::Deserr)] -// #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)] -// pub struct FacetSearchQuery { -// #[deserr(default, error = DeserrQueryParamError)] -// facetQuery: Option, -// #[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError)] -// offset: Param, -// #[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError)] -// limit: Param, -// #[deserr(default, error = DeserrQueryParamError)] -// page: Option>, -// #[deserr(default, error = DeserrQueryParamError)] -// hits_per_page: Option>, -// #[deserr(default, error = DeserrQueryParamError)] -// attributes_to_retrieve: Option>, -// #[deserr(default, error = DeserrQueryParamError)] -// attributes_to_crop: Option>, -// #[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError)] -// crop_length: Param, -// #[deserr(default, error = DeserrQueryParamError)] -// attributes_to_highlight: Option>, -// #[deserr(default, error = DeserrQueryParamError)] -// filter: Option, -// #[deserr(default, error = DeserrQueryParamError)] -// sort: Option, -// #[deserr(default, error = DeserrQueryParamError)] -// show_matches_position: Param, -// #[deserr(default, error = DeserrQueryParamError)] -// facets: Option>, -// #[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError)] -// highlight_pre_tag: String, -// #[deserr( default = DEFAULT_HIGHLIGHT_POST_TAG(), error = DeserrQueryParamError)] -// highlight_post_tag: String, -// #[deserr(default = DEFAULT_CROP_MARKER(), error = DeserrQueryParamError)] -// crop_marker: String, -// #[deserr(default, error = DeserrQueryParamError)] -// matching_strategy: MatchingStrategy, -// } - // TODO improve the error messages #[derive(Debug, Clone, Default, PartialEq, Eq, deserr::Deserr)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index cd6a0bc54..adf9a01e6 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -3,7 +3,6 @@ use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::str::FromStr; use std::time::Instant; -use actix_http::header::q; use deserr::Deserr; use either::Either; use index_scheduler::RoFeatures; @@ -15,7 +14,7 @@ use meilisearch_types::heed::RoTxn; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy}; use meilisearch_types::milli::{ - dot_product_similarity, facet, FacetSearchResult, InternalError, SearchForFacetValue, + dot_product_similarity, FacetValueHit, InternalError, SearchForFacetValue, }; use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::{milli, Document}; @@ -30,7 +29,6 @@ use serde::Serialize; use serde_json::{json, Value}; use crate::error::MeilisearchHttpError; -use crate::routes::indexes::facet_search::FacetSearchQuery; type MatchesPosition = BTreeMap>; @@ -283,6 +281,14 @@ pub struct FacetStats { pub max: f64, } +#[derive(Serialize, Debug, Clone, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct FacetSearchResult { + pub hits: Vec, + pub query: Option, + pub processing_time_ms: u128, +} + /// Incorporate search rules in search query pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) { query.filter = match (query.filter.take(), rules.filter) { @@ -577,19 +583,25 @@ pub fn perform_search( pub fn perform_facet_search( index: &Index, search_query: SearchQuery, - mut facet_query: Option, + facet_query: Option, facet_name: String, -) -> Result, MeilisearchHttpError> { +) -> Result { let before_search = Instant::now(); let rtxn = index.read_txn()?; let (search, _, _, _) = prepare_search(index, &rtxn, &search_query)?; let mut facet_search = SearchForFacetValue::new(facet_name, search); - if let Some(facet_query) = facet_query.take() { + if let Some(facet_query) = &facet_query { facet_search.query(facet_query); } - facet_search.execute().map_err(Into::into) + let hits = facet_search.execute()?; + + Ok(FacetSearchResult { + hits, + query: facet_query, + processing_time_ms: before_search.elapsed().as_millis(), + }) } fn insert_geo_distance(sorts: &[String], document: &mut Document) { diff --git a/milli/src/heed_codec/fst_set_codec.rs b/milli/src/heed_codec/fst_set_codec.rs index a8d35ebda..551f04678 100644 --- a/milli/src/heed_codec/fst_set_codec.rs +++ b/milli/src/heed_codec/fst_set_codec.rs @@ -1,6 +1,6 @@ -use fst::Set; use std::borrow::Cow; +use fst::Set; use heed::{BytesDecode, BytesEncode}; /// A codec for values of type `Set<&[u8]>`. diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 6f14977ec..84ca0ce97 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -57,7 +57,7 @@ pub use self::heed_codec::{ }; pub use self::index::Index; pub use self::search::{ - FacetDistribution, FacetSearchResult, Filter, FormatOptions, MatchBounds, MatcherBuilder, + FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, Search, SearchForFacetValue, SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, }; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index bc88a0ff8..0f71fb172 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -257,7 +257,7 @@ impl<'a> SearchForFacetValue<'a> { self } - pub fn execute(&self) -> Result> { + pub fn execute(&self) -> Result> { let index = self.search_query.index; let rtxn = self.search_query.rtxn; @@ -304,7 +304,7 @@ impl<'a> SearchForFacetValue<'a> { }; let count = search_candidates.intersection_len(&docids); if count != 0 { - result.push(FacetSearchResult { value: value.to_string(), count }); + result.push(FacetValueHit { value: value.to_string(), count }); length += 1; } if length >= MAX_NUMBER_OF_FACETS { @@ -327,7 +327,7 @@ impl<'a> SearchForFacetValue<'a> { }; let count = search_candidates.intersection_len(&docids); if count != 0 { - result.push(FacetSearchResult { value: value.to_string(), count }); + result.push(FacetValueHit { value: value.to_string(), count }); length += 1; } if length >= MAX_NUMBER_OF_FACETS { @@ -341,8 +341,8 @@ impl<'a> SearchForFacetValue<'a> { } } -#[derive(Debug, serde::Serialize)] -pub struct FacetSearchResult { +#[derive(Debug, Clone, serde::Serialize, PartialEq)] +pub struct FacetValueHit { /// The original facet value pub value: String, /// The number of documents associated to this facet From a05074e67551f8e8a2bc9a9f12d7c8bca3b29c4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 26 Apr 2023 17:19:02 +0200 Subject: [PATCH 08/38] Fix the max number of facets to be returned to 100 --- milli/src/search/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 0f71fb172..211e6a0e8 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -22,7 +22,7 @@ static LEVDIST1: Lazy = Lazy::new(|| LevBuilder::new(1, true)); static LEVDIST2: Lazy = Lazy::new(|| LevBuilder::new(2, true)); /// The maximum number of facets returned by the facet search route. -const MAX_NUMBER_OF_FACETS: usize = 1000; +const MAX_NUMBER_OF_FACETS: usize = 100; pub mod facet; mod fst_utils; From 702041b7e19d4af4daef712811514267bfc2cdf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 26 Apr 2023 18:09:24 +0200 Subject: [PATCH 09/38] Improve the returned errors from the facet-search route --- meilisearch-types/src/error.rs | 4 ++++ meilisearch/src/routes/indexes/facet_search.rs | 4 ++-- milli/src/error.rs | 10 ++++++++++ milli/src/search/mod.rs | 3 +-- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 09c8b7b8c..1a87b2707 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -242,11 +242,14 @@ InvalidSearchMatchingStrategy , InvalidRequest , BAD_REQUEST ; InvalidSearchOffset , InvalidRequest , BAD_REQUEST ; InvalidSearchPage , InvalidRequest , BAD_REQUEST ; InvalidSearchQ , InvalidRequest , BAD_REQUEST ; +InvalidFacetSearchQuery , InvalidRequest , BAD_REQUEST ; +InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ; InvalidSearchVector , InvalidRequest , BAD_REQUEST ; InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ; InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ; InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ; InvalidSearchSort , InvalidRequest , BAD_REQUEST ; +InvalidSearchFacet , InvalidRequest , BAD_REQUEST ; InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ; InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ; @@ -340,6 +343,7 @@ impl ErrorCode for milli::Error { UserError::InvalidSearchableAttribute { .. } => { Code::InvalidAttributesToSearchOn } + UserError::InvalidSearchFacet { .. } => Code::InvalidSearchFacet, UserError::CriterionError(_) => Code::InvalidSettingsRankingRules, UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField, UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions, diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 3f97e2858..3633057e8 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -28,9 +28,9 @@ pub fn configure(cfg: &mut web::ServiceConfig) { #[derive(Debug, Clone, Default, PartialEq, Eq, deserr::Deserr)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct FacetSearchQuery { - #[deserr(default, error = DeserrJsonError)] + #[deserr(default, error = DeserrJsonError)] pub facet_query: Option, - #[deserr(default, error = DeserrJsonError)] + #[deserr(error = DeserrJsonError)] pub facet_name: String, #[deserr(default, error = DeserrJsonError)] pub q: Option, diff --git a/milli/src/error.rs b/milli/src/error.rs index 61597faf3..81be9a215 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -128,6 +128,16 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco } )] InvalidSortableAttribute { field: String, valid_fields: BTreeSet }, + #[error("Attribute `{}` is not filterable. {}", + .field, + match .valid_fields.is_empty() { + true => "This index does not have configured filterable attributes.".to_string(), + false => format!("Available filterable attributes are: `{}`.", + valid_fields.iter().map(AsRef::as_ref).collect::>().join(", ") + ), + } + )] + InvalidSearchFacet { field: String, valid_fields: BTreeSet }, #[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.", .field, .valid_fields.iter().map(AsRef::as_ref).collect::>().join(", "), diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 211e6a0e8..87a46229d 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -263,8 +263,7 @@ impl<'a> SearchForFacetValue<'a> { let filterable_fields = index.filterable_fields(rtxn)?; if !filterable_fields.contains(&self.facet) { - // TODO create a new type of error - return Err(UserError::InvalidSortableAttribute { + return Err(UserError::InvalidSearchFacet { field: self.facet.clone(), valid_fields: filterable_fields.into_iter().collect(), })?; From f36de2115f484e524f35c81de86bca098eb9ca64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 26 Apr 2023 18:35:06 +0200 Subject: [PATCH 10/38] Make clippy happy --- meilisearch/src/search.rs | 4 ++-- milli/src/heed_codec/fst_set_codec.rs | 2 +- milli/src/update/facet/mod.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index adf9a01e6..5cb8725a6 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -315,7 +315,7 @@ fn prepare_search<'t>( query: &'t SearchQuery, features: RoFeatures, ) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> { - let mut search = index.search(&rtxn); + let mut search = index.search(rtxn); if query.vector.is_some() && query.q.is_some() { warn!("Ignoring the query string `q` when used with the `vector` parameter."); @@ -337,7 +337,7 @@ fn prepare_search<'t>( search.terms_matching_strategy(query.matching_strategy.into()); let max_total_hits = index - .pagination_max_total_hits(&rtxn) + .pagination_max_total_hits(rtxn) .map_err(milli::Error::from)? .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); diff --git a/milli/src/heed_codec/fst_set_codec.rs b/milli/src/heed_codec/fst_set_codec.rs index 551f04678..fc79acf29 100644 --- a/milli/src/heed_codec/fst_set_codec.rs +++ b/milli/src/heed_codec/fst_set_codec.rs @@ -18,6 +18,6 @@ impl<'a> BytesDecode<'a> for FstSetCodec { type DItem = Set<&'a [u8]>; fn bytes_decode(bytes: &'a [u8]) -> Option { - Some(Set::new(bytes).ok()?) + Set::new(bytes).ok() } } diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index 4073ab8e5..0752f7c37 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -163,7 +163,7 @@ impl<'i> FacetsUpdate<'i> { let mut text_fsts = vec![]; let mut current_fst: Option<(u16, fst::SetBuilder>)> = None; let database = self.index.facet_id_string_docids.remap_data_type::(); - for result in database.iter(&wtxn)? { + for result in database.iter(wtxn)? { let (facet_group_key, _) = result?; if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key { current_fst = match current_fst.take() { From aadbe88048c5eb853d1b7f9bce8f36df76882c73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 27 Apr 2023 10:42:48 +0200 Subject: [PATCH 11/38] Return an internal error when a field id is missing --- milli/src/search/mod.rs | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 87a46229d..28a53e3a9 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -266,18 +266,24 @@ impl<'a> SearchForFacetValue<'a> { return Err(UserError::InvalidSearchFacet { field: self.facet.clone(), valid_fields: filterable_fields.into_iter().collect(), - })?; + } + .into()); } let fields_ids_map = index.fields_ids_map(rtxn)?; - let (field_id, fst) = match fields_ids_map.id(&self.facet) { - Some(fid) => { - match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? { - Some(fst) => (fid, fst), - None => todo!("return an error, is the user trying to search in numbers?"), + let fid = match fields_ids_map.id(&self.facet) { + Some(fid) => fid, + None => { + return Err(FieldIdMapMissingEntry::FieldName { + field_name: self.facet.clone(), + process: "search for facet values", } + .into()); } - None => todo!("return an internal error bug"), + }; + let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? { + Some(fst) => fst, + None => return Ok(vec![]), }; let search_candidates = self.search_query.execute()?.candidates; @@ -296,7 +302,7 @@ impl<'a> SearchForFacetValue<'a> { let mut length = 0; while let Some(facet_value) = stream.next() { let value = std::str::from_utf8(facet_value)?; - let key = FacetGroupKey { field_id, level: 0, left_bound: value }; + let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value }; let docids = match index.facet_id_string_docids.get(rtxn, &key)? { Some(FacetGroupValue { bitmap, .. }) => bitmap, None => todo!("return an internal error"), @@ -319,7 +325,7 @@ impl<'a> SearchForFacetValue<'a> { let mut length = 0; while let Some(facet_value) = stream.next() { let value = std::str::from_utf8(facet_value)?; - let key = FacetGroupKey { field_id, level: 0, left_bound: value }; + let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value }; let docids = match index.facet_id_string_docids.get(rtxn, &key)? { Some(FacetGroupValue { bitmap, .. }) => bitmap, None => todo!("return an internal error"), From 55c17aa38b44a88706d5f9a36203eced3346d79c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 27 Apr 2023 10:43:02 +0200 Subject: [PATCH 12/38] Rename the SearchForFacetValues struct --- meilisearch/src/search.rs | 4 ++-- milli/src/lib.rs | 2 +- milli/src/search/mod.rs | 11 ++++++----- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 5cb8725a6..c32724ec0 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -14,7 +14,7 @@ use meilisearch_types::heed::RoTxn; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy}; use meilisearch_types::milli::{ - dot_product_similarity, FacetValueHit, InternalError, SearchForFacetValue, + dot_product_similarity, FacetValueHit, InternalError, SearchForFacetValues, }; use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::{milli, Document}; @@ -590,7 +590,7 @@ pub fn perform_facet_search( let rtxn = index.read_txn()?; let (search, _, _, _) = prepare_search(index, &rtxn, &search_query)?; - let mut facet_search = SearchForFacetValue::new(facet_name, search); + let mut facet_search = SearchForFacetValues::new(facet_name, search); if let Some(facet_query) = &facet_query { facet_search.query(facet_query); } diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 84ca0ce97..4360eb38e 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -58,7 +58,7 @@ pub use self::heed_codec::{ pub use self::index::Index; pub use self::search::{ FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder, - MatchingWords, Search, SearchForFacetValue, SearchResult, TermsMatchingStrategy, + MatchingWords, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, }; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 28a53e3a9..77f9ce0b8 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -13,7 +13,8 @@ use crate::error::UserError; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::{ - execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext, BEU16, + execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldIdMapMissingEntry, Index, + Result, SearchContext, BEU16, }; // Building these factories is not free. @@ -241,15 +242,15 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA { } } -pub struct SearchForFacetValue<'a> { +pub struct SearchForFacetValues<'a> { query: Option, facet: String, search_query: Search<'a>, } -impl<'a> SearchForFacetValue<'a> { - pub fn new(facet: String, search_query: Search<'a>) -> SearchForFacetValue<'a> { - SearchForFacetValue { query: None, facet, search_query } +impl<'a> SearchForFacetValues<'a> { + pub fn new(facet: String, search_query: Search<'a>) -> SearchForFacetValues<'a> { + SearchForFacetValues { query: None, facet, search_query } } pub fn query(&mut self, query: impl Into) -> &mut Self { From 8e86eb91bbb85402efa02f0fd7d26ac5676a5556 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 27 Apr 2023 10:50:42 +0200 Subject: [PATCH 13/38] Log an error when a facet value is missing from the database --- milli/src/search/mod.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 77f9ce0b8..114ca2a04 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -3,6 +3,7 @@ use std::fmt; use fst::automaton::{Complement, Intersection, StartsWith, Str, Union}; use fst::Streamer; use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; +use log::{debug, error}; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; @@ -306,7 +307,10 @@ impl<'a> SearchForFacetValues<'a> { let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value }; let docids = match index.facet_id_string_docids.get(rtxn, &key)? { Some(FacetGroupValue { bitmap, .. }) => bitmap, - None => todo!("return an internal error"), + None => { + error!("the facet value is missing from the facet database: {key:?}"); + continue; + } }; let count = search_candidates.intersection_len(&docids); if count != 0 { @@ -329,7 +333,10 @@ impl<'a> SearchForFacetValues<'a> { let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value }; let docids = match index.facet_id_string_docids.get(rtxn, &key)? { Some(FacetGroupValue { bitmap, .. }) => bitmap, - None => todo!("return an internal error"), + None => { + error!("the facet value is missing from the facet database: {key:?}"); + continue; + } }; let count = search_candidates.intersection_len(&docids); if count != 0 { From 7bd67543dde9599d98fd7f262668b9cc112327a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 27 Apr 2023 16:04:03 +0200 Subject: [PATCH 14/38] Support the typoTolerant.enabled parameter --- milli/src/search/mod.rs | 87 ++++++++++++++++++++++++++++------------- 1 file changed, 60 insertions(+), 27 deletions(-) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 114ca2a04..ab7c336b4 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -283,6 +283,7 @@ impl<'a> SearchForFacetValues<'a> { .into()); } }; + let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? { Some(fst) => fst, None => return Ok(vec![]), @@ -292,37 +293,69 @@ impl<'a> SearchForFacetValues<'a> { match self.query.as_ref() { Some(query) => { - let is_prefix = true; - let starts = StartsWith(Str::new(get_first(query))); - let first = Intersection(build_dfa(query, 1, is_prefix), Complement(&starts)); - let second_dfa = build_dfa(query, 2, is_prefix); - let second = Intersection(&second_dfa, &starts); - let automaton = Union(first, &second); + if self.search_query.index.authorize_typos(rtxn)? { + let is_prefix = true; + let starts = StartsWith(Str::new(get_first(query))); + let first = Intersection(build_dfa(query, 1, is_prefix), Complement(&starts)); + let second_dfa = build_dfa(query, 2, is_prefix); + let second = Intersection(&second_dfa, &starts); + let automaton = Union(first, &second); - let mut stream = fst.search(automaton).into_stream(); - let mut result = vec![]; - let mut length = 0; - while let Some(facet_value) = stream.next() { - let value = std::str::from_utf8(facet_value)?; - let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value }; - let docids = match index.facet_id_string_docids.get(rtxn, &key)? { - Some(FacetGroupValue { bitmap, .. }) => bitmap, - None => { - error!("the facet value is missing from the facet database: {key:?}"); - continue; + let mut stream = fst.search(automaton).into_stream(); + let mut result = vec![]; + let mut length = 0; + while let Some(facet_value) = stream.next() { + let value = std::str::from_utf8(facet_value)?; + let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value }; + let docids = match index.facet_id_string_docids.get(rtxn, &key)? { + Some(FacetGroupValue { bitmap, .. }) => bitmap, + None => { + error!( + "the facet value is missing from the facet database: {key:?}" + ); + continue; + } + }; + let count = search_candidates.intersection_len(&docids); + if count != 0 { + result.push(FacetValueHit { value: value.to_string(), count }); + length += 1; + } + if length >= MAX_NUMBER_OF_FACETS { + break; } - }; - let count = search_candidates.intersection_len(&docids); - if count != 0 { - result.push(FacetValueHit { value: value.to_string(), count }); - length += 1; } - if length >= MAX_NUMBER_OF_FACETS { - break; - } - } - Ok(result) + Ok(result) + } else { + let automaton = StartsWith(Str::new(query)); + let mut stream = fst.search(automaton).into_stream(); + let mut result = vec![]; + let mut length = 0; + while let Some(facet_value) = stream.next() { + let value = std::str::from_utf8(facet_value)?; + let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value }; + let docids = match index.facet_id_string_docids.get(rtxn, &key)? { + Some(FacetGroupValue { bitmap, .. }) => bitmap, + None => { + error!( + "the facet value is missing from the facet database: {key:?}" + ); + continue; + } + }; + let count = search_candidates.intersection_len(&docids); + if count != 0 { + result.push(FacetValueHit { value: value.to_string(), count }); + length += 1; + } + if length >= MAX_NUMBER_OF_FACETS { + break; + } + } + + Ok(result) + } } None => { let mut stream = fst.stream(); From 2ceb781c7344f025822e0935f3b3062d8445ed06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 27 Apr 2023 16:48:37 +0200 Subject: [PATCH 15/38] Use the disableOnWords parameter on the facet-search route --- milli/src/search/mod.rs | 68 +++++++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 26 deletions(-) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index ab7c336b4..2b02b1bf8 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -294,35 +294,51 @@ impl<'a> SearchForFacetValues<'a> { match self.query.as_ref() { Some(query) => { if self.search_query.index.authorize_typos(rtxn)? { - let is_prefix = true; - let starts = StartsWith(Str::new(get_first(query))); - let first = Intersection(build_dfa(query, 1, is_prefix), Complement(&starts)); - let second_dfa = build_dfa(query, 2, is_prefix); - let second = Intersection(&second_dfa, &starts); - let automaton = Union(first, &second); - - let mut stream = fst.search(automaton).into_stream(); let mut result = vec![]; - let mut length = 0; - while let Some(facet_value) = stream.next() { - let value = std::str::from_utf8(facet_value)?; - let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value }; - let docids = match index.facet_id_string_docids.get(rtxn, &key)? { - Some(FacetGroupValue { bitmap, .. }) => bitmap, - None => { - error!( - "the facet value is missing from the facet database: {key:?}" - ); - continue; + + let exact_words_fst = self.search_query.index.exact_words(rtxn)?; + if exact_words_fst.map_or(false, |fst| fst.contains(query)) { + let key = + FacetGroupKey { field_id: fid, level: 0, left_bound: query.as_ref() }; + if let Some(FacetGroupValue { bitmap, .. }) = + index.facet_id_string_docids.get(rtxn, &key)? + { + let count = search_candidates.intersection_len(&bitmap); + if count != 0 { + result.push(FacetValueHit { value: query.to_string(), count }); } - }; - let count = search_candidates.intersection_len(&docids); - if count != 0 { - result.push(FacetValueHit { value: value.to_string(), count }); - length += 1; } - if length >= MAX_NUMBER_OF_FACETS { - break; + } else { + let is_prefix = true; + let starts = StartsWith(Str::new(get_first(query))); + let first = + Intersection(build_dfa(query, 1, is_prefix), Complement(&starts)); + let second_dfa = build_dfa(query, 2, is_prefix); + let second = Intersection(&second_dfa, &starts); + let automaton = Union(first, &second); + + let mut stream = fst.search(automaton).into_stream(); + let mut length = 0; + while let Some(facet_value) = stream.next() { + let value = std::str::from_utf8(facet_value)?; + let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value }; + let docids = match index.facet_id_string_docids.get(rtxn, &key)? { + Some(FacetGroupValue { bitmap, .. }) => bitmap, + None => { + error!( + "the facet value is missing from the facet database: {key:?}" + ); + continue; + } + }; + let count = search_candidates.intersection_len(&docids); + if count != 0 { + result.push(FacetValueHit { value: value.to_string(), count }); + length += 1; + } + if length >= MAX_NUMBER_OF_FACETS { + break; + } } } From f35ad96afac62460f451963fdc5ab84e0da23b2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 27 Apr 2023 17:01:18 +0200 Subject: [PATCH 16/38] Use the disableOnAttributes parameter on the facet-search route --- milli/src/search/mod.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 2b02b1bf8..97efb94a1 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -293,7 +293,11 @@ impl<'a> SearchForFacetValues<'a> { match self.query.as_ref() { Some(query) => { - if self.search_query.index.authorize_typos(rtxn)? { + let authorize_typos = self.search_query.index.authorize_typos(rtxn)?; + let field_authorizes_typos = + !self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid); + + if authorize_typos && field_authorizes_typos { let mut result = vec![]; let exact_words_fst = self.search_query.index.exact_words(rtxn)?; From 0252cfe8b6d67b931f9c228a2e8a1ab477347569 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 27 Apr 2023 17:10:58 +0200 Subject: [PATCH 17/38] Simplify the placeholder search of the facet-search route --- milli/src/search/mod.rs | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 97efb94a1..c872e3ee4 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -298,7 +298,7 @@ impl<'a> SearchForFacetValues<'a> { !self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid); if authorize_typos && field_authorizes_typos { - let mut result = vec![]; + let mut results = vec![]; let exact_words_fst = self.search_query.index.exact_words(rtxn)?; if exact_words_fst.map_or(false, |fst| fst.contains(query)) { @@ -309,7 +309,7 @@ impl<'a> SearchForFacetValues<'a> { { let count = search_candidates.intersection_len(&bitmap); if count != 0 { - result.push(FacetValueHit { value: query.to_string(), count }); + results.push(FacetValueHit { value: query.to_string(), count }); } } } else { @@ -337,7 +337,7 @@ impl<'a> SearchForFacetValues<'a> { }; let count = search_candidates.intersection_len(&docids); if count != 0 { - result.push(FacetValueHit { value: value.to_string(), count }); + results.push(FacetValueHit { value: value.to_string(), count }); length += 1; } if length >= MAX_NUMBER_OF_FACETS { @@ -346,11 +346,11 @@ impl<'a> SearchForFacetValues<'a> { } } - Ok(result) + Ok(results) } else { let automaton = StartsWith(Str::new(query)); let mut stream = fst.search(automaton).into_stream(); - let mut result = vec![]; + let mut results = vec![]; let mut length = 0; while let Some(facet_value) = stream.next() { let value = std::str::from_utf8(facet_value)?; @@ -366,7 +366,7 @@ impl<'a> SearchForFacetValues<'a> { }; let count = search_candidates.intersection_len(&docids); if count != 0 { - result.push(FacetValueHit { value: value.to_string(), count }); + results.push(FacetValueHit { value: value.to_string(), count }); length += 1; } if length >= MAX_NUMBER_OF_FACETS { @@ -374,34 +374,26 @@ impl<'a> SearchForFacetValues<'a> { } } - Ok(result) + Ok(results) } } None => { - let mut stream = fst.stream(); - let mut result = vec![]; + let mut results = vec![]; let mut length = 0; - while let Some(facet_value) = stream.next() { - let value = std::str::from_utf8(facet_value)?; - let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value }; - let docids = match index.facet_id_string_docids.get(rtxn, &key)? { - Some(FacetGroupValue { bitmap, .. }) => bitmap, - None => { - error!("the facet value is missing from the facet database: {key:?}"); - continue; - } - }; - let count = search_candidates.intersection_len(&docids); + let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" }; + for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? { + let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) = + result?; + let count = search_candidates.intersection_len(&bitmap); if count != 0 { - result.push(FacetValueHit { value: value.to_string(), count }); + results.push(FacetValueHit { value: left_bound.to_string(), count }); length += 1; } if length >= MAX_NUMBER_OF_FACETS { break; } } - - Ok(result) + Ok(results) } } } From 87e22e436aa0e032e7e72b450474fd2b6aab973d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 4 May 2023 12:27:19 +0200 Subject: [PATCH 18/38] Fix compilation issues --- meilisearch/src/routes/indexes/facet_search.rs | 14 ++++++++++++-- meilisearch/src/search.rs | 3 ++- milli/src/search/mod.rs | 18 +++++++++--------- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 3633057e8..b56495c5a 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -25,7 +25,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { } // TODO improve the error messages -#[derive(Debug, Clone, Default, PartialEq, Eq, deserr::Deserr)] +#[derive(Debug, Clone, Default, PartialEq, deserr::Deserr)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct FacetSearchQuery { #[deserr(default, error = DeserrJsonError)] @@ -34,6 +34,8 @@ pub struct FacetSearchQuery { pub facet_name: String, #[deserr(default, error = DeserrJsonError)] pub q: Option, + #[deserr(default, error = DeserrJsonError)] + pub vector: Option>, #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] pub offset: usize, #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError)] @@ -52,6 +54,10 @@ pub struct FacetSearchQuery { pub attributes_to_highlight: Option>, #[deserr(default, error = DeserrJsonError, default)] pub show_matches_position: bool, + #[deserr(default, error = DeserrJsonError, default)] + pub show_ranking_score: bool, + #[deserr(default, error = DeserrJsonError, default)] + pub show_ranking_score_details: bool, #[deserr(default, error = DeserrJsonError)] pub filter: Option, #[deserr(default, error = DeserrJsonError)] @@ -92,8 +98,9 @@ pub async fn search( } let index = index_scheduler.index(&index_uid)?; + let features = index_scheduler.features()?; let search_result = tokio::task::spawn_blocking(move || { - perform_facet_search(&index, search_query, facet_query, facet_name) + perform_facet_search(&index, search_query, facet_query, facet_name, features) }) .await?; @@ -121,6 +128,8 @@ impl From for SearchQuery { crop_length: value.crop_length, attributes_to_highlight: value.attributes_to_highlight, show_matches_position: value.show_matches_position, + show_ranking_score: value.show_ranking_score, + show_ranking_score_details: value.show_ranking_score_details, filter: value.filter, sort: value.sort, facets: value.facets, @@ -128,6 +137,7 @@ impl From for SearchQuery { highlight_post_tag: value.highlight_post_tag, crop_marker: value.crop_marker, matching_strategy: value.matching_strategy, + vector: value.vector, } } } diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index c32724ec0..8a29f091d 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -585,11 +585,12 @@ pub fn perform_facet_search( search_query: SearchQuery, facet_query: Option, facet_name: String, + features: RoFeatures, ) -> Result { let before_search = Instant::now(); let rtxn = index.read_txn()?; - let (search, _, _, _) = prepare_search(index, &rtxn, &search_query)?; + let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features)?; let mut facet_search = SearchForFacetValues::new(facet_name, search); if let Some(facet_query) = &facet_query { facet_search.query(facet_query); diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index c872e3ee4..7a09a8211 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -1,9 +1,9 @@ use std::fmt; -use fst::automaton::{Complement, Intersection, StartsWith, Str, Union}; -use fst::Streamer; +use fst::automaton::{Automaton, Str}; +use fst::{IntoStreamer, Streamer}; use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; -use log::{debug, error}; +use log::error; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; @@ -314,12 +314,12 @@ impl<'a> SearchForFacetValues<'a> { } } else { let is_prefix = true; - let starts = StartsWith(Str::new(get_first(query))); - let first = - Intersection(build_dfa(query, 1, is_prefix), Complement(&starts)); + let starts = Str::new(get_first(query)).starts_with(); + let first = build_dfa(query, 1, is_prefix) + .intersection(starts.clone().complement()); let second_dfa = build_dfa(query, 2, is_prefix); - let second = Intersection(&second_dfa, &starts); - let automaton = Union(first, &second); + let second = second_dfa.intersection(starts); + let automaton = first.union(&second); let mut stream = fst.search(automaton).into_stream(); let mut length = 0; @@ -348,7 +348,7 @@ impl<'a> SearchForFacetValues<'a> { Ok(results) } else { - let automaton = StartsWith(Str::new(query)); + let automaton = Str::new(query).starts_with(); let mut stream = fst.search(automaton).into_stream(); let mut results = vec![]; let mut length = 0; From e1b8fb48ee154575bfa0c062af588e0311200537 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 4 May 2023 15:09:17 +0200 Subject: [PATCH 19/38] Use the minWordSizeForTypos index settings --- milli/src/search/mod.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 7a09a8211..af5f3415c 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -313,13 +313,17 @@ impl<'a> SearchForFacetValues<'a> { } } } else { + let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?; + let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?; + let is_prefix = true; - let starts = Str::new(get_first(query)).starts_with(); - let first = build_dfa(query, 1, is_prefix) - .intersection(starts.clone().complement()); - let second_dfa = build_dfa(query, 2, is_prefix); - let second = second_dfa.intersection(starts); - let automaton = first.union(&second); + let automaton = if query.len() < one_typo as usize { + build_dfa(query, 0, is_prefix) + } else if query.len() < two_typos as usize { + build_dfa(query, 1, is_prefix) + } else { + build_dfa(query, 2, is_prefix) + }; let mut stream = fst.search(automaton).into_stream(); let mut length = 0; From ed0ff4755189381b5f94e464c2c2d625811cbcd2 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 7 Jun 2023 10:33:11 +0200 Subject: [PATCH 20/38] Return an empty list of results if attribute is set as filterable --- milli/src/search/mod.rs | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index af5f3415c..2f0edc226 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -14,8 +14,7 @@ use crate::error::UserError; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::{ - execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldIdMapMissingEntry, Index, - Result, SearchContext, BEU16, + execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext, BEU16, }; // Building these factories is not free. @@ -275,13 +274,9 @@ impl<'a> SearchForFacetValues<'a> { let fields_ids_map = index.fields_ids_map(rtxn)?; let fid = match fields_ids_map.id(&self.facet) { Some(fid) => fid, - None => { - return Err(FieldIdMapMissingEntry::FieldName { - field_name: self.facet.clone(), - process: "search for facet values", - } - .into()); - } + // we return an empty list of results when the attribute has been + // set as filterable but no document contains this field (yet). + None => return Ok(Vec::new()), }; let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? { From e9a3029c300fd87c7969b653a12c3c6020d17566 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 7 Jun 2023 10:52:35 +0200 Subject: [PATCH 21/38] Use the right field id to write the string facet values FST --- milli/src/update/facet/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index 0752f7c37..0e6fd494c 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -169,7 +169,7 @@ impl<'i> FacetsUpdate<'i> { current_fst = match current_fst.take() { Some((fid, fst_builder)) if fid != field_id => { let fst = fst_builder.into_set(); - text_fsts.push((field_id, fst)); + text_fsts.push((fid, fst)); Some((field_id, fst::SetBuilder::memory())) } Some((field_id, fst_builder)) => Some((field_id, fst_builder)), From 41760a9306332da33406ae555142b59977f44086 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 7 Jun 2023 11:04:53 +0200 Subject: [PATCH 22/38] Introduce a new invalid_facet_search_facet_name error code --- meilisearch-types/src/error.rs | 5 ++++- milli/src/error.rs | 2 +- milli/src/search/mod.rs | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 1a87b2707..c7ab6fa0f 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -233,6 +233,7 @@ InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ; InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ; InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ; InvalidSearchFacets , InvalidRequest , BAD_REQUEST ; +InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ; InvalidSearchFilter , InvalidRequest , BAD_REQUEST ; InvalidSearchHighlightPostTag , InvalidRequest , BAD_REQUEST ; InvalidSearchHighlightPreTag , InvalidRequest , BAD_REQUEST ; @@ -343,7 +344,9 @@ impl ErrorCode for milli::Error { UserError::InvalidSearchableAttribute { .. } => { Code::InvalidAttributesToSearchOn } - UserError::InvalidSearchFacet { .. } => Code::InvalidSearchFacet, + UserError::InvalidFacetSearchFacetName { .. } => { + Code::InvalidFacetSearchFacetName + } UserError::CriterionError(_) => Code::InvalidSettingsRankingRules, UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField, UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions, diff --git a/milli/src/error.rs b/milli/src/error.rs index 81be9a215..4f9a9028f 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -137,7 +137,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco ), } )] - InvalidSearchFacet { field: String, valid_fields: BTreeSet }, + InvalidFacetSearchFacetName { field: String, valid_fields: BTreeSet }, #[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.", .field, .valid_fields.iter().map(AsRef::as_ref).collect::>().join(", "), diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 2f0edc226..01d854951 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -264,7 +264,7 @@ impl<'a> SearchForFacetValues<'a> { let filterable_fields = index.filterable_fields(rtxn)?; if !filterable_fields.contains(&self.facet) { - return Err(UserError::InvalidSearchFacet { + return Err(UserError::InvalidFacetSearchFacetName { field: self.facet.clone(), valid_fields: filterable_fields.into_iter().collect(), } From cb0bb399fac46c95ec51524fea9ff05c18b9787b Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 7 Jun 2023 11:14:14 +0200 Subject: [PATCH 23/38] Fix the error code returned when the facetName field is missing --- meilisearch-types/src/deserr/mod.rs | 4 ++++ meilisearch-types/src/error.rs | 1 + meilisearch/src/routes/indexes/facet_search.rs | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/meilisearch-types/src/deserr/mod.rs b/meilisearch-types/src/deserr/mod.rs index bbaa42dc0..df304cc2f 100644 --- a/meilisearch-types/src/deserr/mod.rs +++ b/meilisearch-types/src/deserr/mod.rs @@ -151,6 +151,10 @@ make_missing_field_convenience_builder!(MissingApiKeyExpiresAt, missing_api_key_ make_missing_field_convenience_builder!(MissingApiKeyIndexes, missing_api_key_indexes); make_missing_field_convenience_builder!(MissingSwapIndexes, missing_swap_indexes); make_missing_field_convenience_builder!(MissingDocumentFilter, missing_document_filter); +make_missing_field_convenience_builder!( + MissingFacetSearchFacetName, + missing_facet_search_facet_name +); // Integrate a sub-error into a [`DeserrError`] by taking its error message but using // the default error code (C) from `Self` diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index c7ab6fa0f..004c85376 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -288,6 +288,7 @@ MissingApiKeyIndexes , InvalidRequest , BAD_REQUEST ; MissingAuthorizationHeader , Auth , UNAUTHORIZED ; MissingContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ; MissingDocumentId , InvalidRequest , BAD_REQUEST ; +MissingFacetSearchFacetName , InvalidRequest , BAD_REQUEST ; MissingIndexUid , InvalidRequest , BAD_REQUEST ; MissingMasterKey , Auth , UNAUTHORIZED ; MissingPayload , InvalidRequest , BAD_REQUEST ; diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index b56495c5a..d07a22257 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -30,7 +30,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { pub struct FacetSearchQuery { #[deserr(default, error = DeserrJsonError)] pub facet_query: Option, - #[deserr(error = DeserrJsonError)] + #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_facet_search_facet_name)] pub facet_name: String, #[deserr(default, error = DeserrJsonError)] pub q: Option, From 6fb8af423c50f06b93ad7aa3f014c316ec8d9dd2 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 7 Jun 2023 11:20:46 +0200 Subject: [PATCH 24/38] Rename the hits and query output into facetHits and facetQuery respectively --- meilisearch/src/search.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 8a29f091d..bebf80084 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -284,8 +284,8 @@ pub struct FacetStats { #[derive(Serialize, Debug, Clone, PartialEq)] #[serde(rename_all = "camelCase")] pub struct FacetSearchResult { - pub hits: Vec, - pub query: Option, + pub facet_hits: Vec, + pub facet_query: Option, pub processing_time_ms: u128, } @@ -596,11 +596,9 @@ pub fn perform_facet_search( facet_search.query(facet_query); } - let hits = facet_search.execute()?; - Ok(FacetSearchResult { - hits, - query: facet_query, + facet_hits: facet_search.execute()?, + facet_query, processing_time_ms: before_search.elapsed().as_millis(), }) } From 904f6574bf654a5f1b46330c414062f00ca421d9 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 7 Jun 2023 17:02:41 +0200 Subject: [PATCH 25/38] Make rustfmt happy --- meilisearch/src/analytics/segment_analytics.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 4508fa26e..51bd703a6 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -527,11 +527,13 @@ impl Segment { let _ = self.batcher.push(update_documents).await; } if let Some(get_fetch_documents) = - take(get_fetch_documents_aggregator).into_event(&user, "Documents Fetched GET") { + take(get_fetch_documents_aggregator).into_event(&user, "Documents Fetched GET") + { let _ = self.batcher.push(get_fetch_documents).await; } if let Some(post_fetch_documents) = - take(post_fetch_documents_aggregator).into_event(&user, "Documents Fetched POST") { + take(post_fetch_documents_aggregator).into_event(&user, "Documents Fetched POST") + { let _ = self.batcher.push(post_fetch_documents).await; } if let Some(get_tasks) = take(get_tasks_aggregator).into_event(&user, "Tasks Seen") { From 09079a4e88d5f4fdd4be0a953fe403c5c86aff89 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 7 Jun 2023 17:58:05 +0200 Subject: [PATCH 26/38] Remove useless InvalidSearchFacet error --- meilisearch-types/src/error.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 004c85376..c25683e3e 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -250,7 +250,6 @@ InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ; InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ; InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ; InvalidSearchSort , InvalidRequest , BAD_REQUEST ; -InvalidSearchFacet , InvalidRequest , BAD_REQUEST ; InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ; InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ; From 2bcd8d2983ffa6b3a473957af807391d1faa7cf3 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 12 Jun 2023 11:13:34 +0200 Subject: [PATCH 27/38] Make sure the facet queries are normalized --- milli/src/search/mod.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 01d854951..1590fac41 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -14,7 +14,8 @@ use crate::error::UserError; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::{ - execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext, BEU16, + execute_search, normalize_facet, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, + SearchContext, BEU16, }; // Building these factories is not free. @@ -288,6 +289,8 @@ impl<'a> SearchForFacetValues<'a> { match self.query.as_ref() { Some(query) => { + let query = normalize_facet(query); + let query = query.as_str(); let authorize_typos = self.search_query.index.authorize_typos(rtxn)?; let field_authorizes_typos = !self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid); @@ -297,8 +300,7 @@ impl<'a> SearchForFacetValues<'a> { let exact_words_fst = self.search_query.index.exact_words(rtxn)?; if exact_words_fst.map_or(false, |fst| fst.contains(query)) { - let key = - FacetGroupKey { field_id: fid, level: 0, left_bound: query.as_ref() }; + let key = FacetGroupKey { field_id: fid, level: 0, left_bound: query }; if let Some(FacetGroupValue { bitmap, .. }) = index.facet_id_string_docids.get(rtxn, &key)? { From 60ddd534390e5a80f4fde0b2dc1de542dc57eeda Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 12 Jun 2023 11:39:31 +0200 Subject: [PATCH 28/38] Return one of the original facet values when doing a facet search --- milli/src/search/mod.rs | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 1590fac41..e05562f8e 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -14,8 +14,8 @@ use crate::error::UserError; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::{ - execute_search, normalize_facet, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, - SearchContext, BEU16, + execute_search, normalize_facet, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, + Result, SearchContext, BEU16, }; // Building these factories is not free. @@ -259,6 +259,18 @@ impl<'a> SearchForFacetValues<'a> { self } + fn one_original_value_of( + &self, + field_id: FieldId, + facet_str: &str, + any_docid: DocumentId, + ) -> Result> { + let index = self.search_query.index; + let rtxn = self.search_query.rtxn; + let key: (FieldId, _, &str) = (field_id, any_docid, facet_str); + Ok(index.field_id_docid_facet_strings.get(rtxn, &key)?.map(|v| v.to_owned())) + } + pub fn execute(&self) -> Result> { let index = self.search_query.index; let rtxn = self.search_query.rtxn; @@ -306,7 +318,10 @@ impl<'a> SearchForFacetValues<'a> { { let count = search_candidates.intersection_len(&bitmap); if count != 0 { - results.push(FacetValueHit { value: query.to_string(), count }); + let value = self + .one_original_value_of(fid, query, bitmap.min().unwrap())? + .unwrap_or_else(|| query.to_string()); + results.push(FacetValueHit { value, count }); } } } else { @@ -338,7 +353,10 @@ impl<'a> SearchForFacetValues<'a> { }; let count = search_candidates.intersection_len(&docids); if count != 0 { - results.push(FacetValueHit { value: value.to_string(), count }); + let value = self + .one_original_value_of(fid, value, docids.min().unwrap())? + .unwrap_or_else(|| query.to_string()); + results.push(FacetValueHit { value, count }); length += 1; } if length >= MAX_NUMBER_OF_FACETS { @@ -367,7 +385,10 @@ impl<'a> SearchForFacetValues<'a> { }; let count = search_candidates.intersection_len(&docids); if count != 0 { - results.push(FacetValueHit { value: value.to_string(), count }); + let value = self + .one_original_value_of(fid, value, docids.min().unwrap())? + .unwrap_or_else(|| query.to_string()); + results.push(FacetValueHit { value, count }); length += 1; } if length >= MAX_NUMBER_OF_FACETS { @@ -387,7 +408,10 @@ impl<'a> SearchForFacetValues<'a> { result?; let count = search_candidates.intersection_len(&bitmap); if count != 0 { - results.push(FacetValueHit { value: left_bound.to_string(), count }); + let value = self + .one_original_value_of(fid, left_bound, bitmap.min().unwrap())? + .unwrap_or_else(|| left_bound.to_string()); + results.push(FacetValueHit { value, count }); length += 1; } if length >= MAX_NUMBER_OF_FACETS { From 26f0fa678d217f3e987da2ec3bc3e801da678694 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Thu, 22 Jun 2023 14:28:32 +0200 Subject: [PATCH 29/38] Change the error message when a facet is not searchable --- milli/src/error.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/milli/src/error.rs b/milli/src/error.rs index 4f9a9028f..1e8e767d3 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -128,11 +128,11 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco } )] InvalidSortableAttribute { field: String, valid_fields: BTreeSet }, - #[error("Attribute `{}` is not filterable. {}", + #[error("Attribute `{}` is not facet-searchable. {}", .field, match .valid_fields.is_empty() { - true => "This index does not have configured filterable attributes.".to_string(), - false => format!("Available filterable attributes are: `{}`.", + true => "This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.".to_string(), + false => format!("Available facet-searchable attributes are: `{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.", valid_fields.iter().map(AsRef::as_ref).collect::>().join(", ") ), } From 29b40295b8440319b859f41f869f0e147d739ce6 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Thu, 22 Jun 2023 14:59:44 +0200 Subject: [PATCH 30/38] Ignore unknown facet search query parameters --- .../src/analytics/segment_analytics.rs | 43 +--------- .../src/routes/indexes/facet_search.rs | 85 +++++++------------ 2 files changed, 34 insertions(+), 94 deletions(-) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 51bd703a6..990daf74c 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -36,7 +36,7 @@ use crate::routes::{create_all_stats, Stats}; use crate::search::{ FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, - DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, + DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, }; use crate::Opt; @@ -960,27 +960,7 @@ pub struct FacetSearchAggregator { impl FacetSearchAggregator { pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self { - let FacetSearchQuery { - facet_query: _, - facet_name, - q, - offset, - limit, - page, - hits_per_page, - attributes_to_retrieve, - attributes_to_crop, - crop_length, - attributes_to_highlight, - show_matches_position, - filter, - sort, - facets, - highlight_pre_tag, - highlight_post_tag, - crop_marker, - matching_strategy, - } = query; + let FacetSearchQuery { facet_query: _, facet_name, q, filter, matching_strategy } = query; let mut ret = Self::default(); ret.timestamp = Some(OffsetDateTime::now_utc()); @@ -989,23 +969,8 @@ impl FacetSearchAggregator { ret.user_agents = extract_user_agents(request).into_iter().collect(); ret.facet_names = Some(facet_name.clone()).into_iter().collect(); - ret.additional_search_parameters_provided = q.is_some() - || *offset != DEFAULT_SEARCH_OFFSET() - || *limit != DEFAULT_SEARCH_LIMIT() - || page.is_some() - || hits_per_page.is_some() - || attributes_to_retrieve.is_some() - || attributes_to_crop.is_some() - || *crop_length != DEFAULT_CROP_LENGTH() - || attributes_to_highlight.is_some() - || *show_matches_position - || filter.is_some() - || sort.is_some() - || facets.is_some() - || *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG() - || *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG() - || *crop_marker != DEFAULT_CROP_MARKER() - || *matching_strategy != MatchingStrategy::default(); + ret.additional_search_parameters_provided = + q.is_some() || filter.is_some() || *matching_strategy != MatchingStrategy::default(); ret } diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index d07a22257..817c2a23d 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -1,5 +1,3 @@ -use std::collections::{BTreeSet, HashSet}; - use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; @@ -26,7 +24,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { // TODO improve the error messages #[derive(Debug, Clone, Default, PartialEq, deserr::Deserr)] -#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[deserr(error = DeserrJsonError, rename_all = camelCase)] pub struct FacetSearchQuery { #[deserr(default, error = DeserrJsonError)] pub facet_query: Option, @@ -36,40 +34,8 @@ pub struct FacetSearchQuery { pub q: Option, #[deserr(default, error = DeserrJsonError)] pub vector: Option>, - #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] - pub offset: usize, - #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError)] - pub limit: usize, - #[deserr(default, error = DeserrJsonError)] - pub page: Option, - #[deserr(default, error = DeserrJsonError)] - pub hits_per_page: Option, - #[deserr(default, error = DeserrJsonError)] - pub attributes_to_retrieve: Option>, - #[deserr(default, error = DeserrJsonError)] - pub attributes_to_crop: Option>, - #[deserr(default, error = DeserrJsonError, default = DEFAULT_CROP_LENGTH())] - pub crop_length: usize, - #[deserr(default, error = DeserrJsonError)] - pub attributes_to_highlight: Option>, - #[deserr(default, error = DeserrJsonError, default)] - pub show_matches_position: bool, - #[deserr(default, error = DeserrJsonError, default)] - pub show_ranking_score: bool, - #[deserr(default, error = DeserrJsonError, default)] - pub show_ranking_score_details: bool, #[deserr(default, error = DeserrJsonError)] pub filter: Option, - #[deserr(default, error = DeserrJsonError)] - pub sort: Option>, - #[deserr(default, error = DeserrJsonError)] - pub facets: Option>, - #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_PRE_TAG())] - pub highlight_pre_tag: String, - #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_POST_TAG())] - pub highlight_post_tag: String, - #[deserr(default, error = DeserrJsonError, default = DEFAULT_CROP_MARKER())] - pub crop_marker: String, #[deserr(default, error = DeserrJsonError, default)] pub matching_strategy: MatchingStrategy, } @@ -117,27 +83,36 @@ pub async fn search( impl From for SearchQuery { fn from(value: FacetSearchQuery) -> Self { + let FacetSearchQuery { + facet_query: _, + facet_name: _, + q, + vector, + filter, + matching_strategy, + } = value; + SearchQuery { - q: value.q, - offset: value.offset, - limit: value.limit, - page: value.page, - hits_per_page: value.hits_per_page, - attributes_to_retrieve: value.attributes_to_retrieve, - attributes_to_crop: value.attributes_to_crop, - crop_length: value.crop_length, - attributes_to_highlight: value.attributes_to_highlight, - show_matches_position: value.show_matches_position, - show_ranking_score: value.show_ranking_score, - show_ranking_score_details: value.show_ranking_score_details, - filter: value.filter, - sort: value.sort, - facets: value.facets, - highlight_pre_tag: value.highlight_pre_tag, - highlight_post_tag: value.highlight_post_tag, - crop_marker: value.crop_marker, - matching_strategy: value.matching_strategy, - vector: value.vector, + q, + offset: DEFAULT_SEARCH_OFFSET(), + limit: DEFAULT_SEARCH_LIMIT(), + page: None, + hits_per_page: None, + attributes_to_retrieve: None, + attributes_to_crop: None, + crop_length: DEFAULT_CROP_LENGTH(), + attributes_to_highlight: None, + show_matches_position: false, + show_ranking_score: false, + show_ranking_score_details: false, + filter, + sort: None, + facets: None, + highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(), + highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(), + crop_marker: DEFAULT_CROP_MARKER(), + matching_strategy, + vector, } } } From 63fd10aaa5c00c7a0a0f8c599a09e03fed9216f8 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 26 Jun 2023 10:40:54 +0200 Subject: [PATCH 31/38] Fix the invalid facet name field error code --- meilisearch/src/routes/indexes/facet_search.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 817c2a23d..21bc2d9c7 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -28,7 +28,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { pub struct FacetSearchQuery { #[deserr(default, error = DeserrJsonError)] pub facet_query: Option, - #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_facet_search_facet_name)] + #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_facet_search_facet_name)] pub facet_name: String, #[deserr(default, error = DeserrJsonError)] pub q: Option, From 32f2556d222fc13b48db1a84e69be8c013758870 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 27 Jun 2023 17:38:24 +0200 Subject: [PATCH 32/38] Move the additional_search_parameters_provided analytic inside facets --- meilisearch/src/analytics/segment_analytics.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 990daf74c..939550cb0 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -506,8 +506,8 @@ impl Segment { { let _ = self.batcher.push(post_multi_search).await; } - if let Some(post_facet_search) = take(post_facet_search_aggregator) - .into_event(&user, "Documents Searched by Facet-Search POST") + if let Some(post_facet_search) = + take(post_facet_search_aggregator).into_event(&user, "Facet Searched POST") { let _ = self.batcher.push(post_facet_search).await; } @@ -1027,8 +1027,8 @@ impl FacetSearchAggregator { }, "facets": { "total_distinct_facet_count": self.facet_names.len(), + "additional_search_parameters_provided": self.additional_search_parameters_provided, }, - "additional_search_parameters_provided": self.additional_search_parameters_provided, }); Some(Track { From 362e9ff845e97925ba03b9d11f97544e83685622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 28 Jun 2023 11:31:52 +0200 Subject: [PATCH 33/38] Add more tests --- meilisearch/tests/common/index.rs | 5 ++ meilisearch/tests/search/facet_search.rs | 92 ++++++++++++++++++++++++ meilisearch/tests/search/mod.rs | 1 + 3 files changed, 98 insertions(+) create mode 100644 meilisearch/tests/search/facet_search.rs diff --git a/meilisearch/tests/common/index.rs b/meilisearch/tests/common/index.rs index 517024c74..1bb4345e8 100644 --- a/meilisearch/tests/common/index.rs +++ b/meilisearch/tests/common/index.rs @@ -377,6 +377,11 @@ impl Index<'_> { self.service.get(url).await } + pub async fn facet_search(&self, query: Value) -> (Value, StatusCode) { + let url = format!("/indexes/{}/facet-search", urlencode(self.uid.as_ref())); + self.service.post_encoded(url, query, self.encoder).await + } + pub async fn update_distinct_attribute(&self, value: Value) -> (Value, StatusCode) { let url = format!("/indexes/{}/settings/{}", urlencode(self.uid.as_ref()), "distinct-attribute"); diff --git a/meilisearch/tests/search/facet_search.rs b/meilisearch/tests/search/facet_search.rs new file mode 100644 index 000000000..7628f2fed --- /dev/null +++ b/meilisearch/tests/search/facet_search.rs @@ -0,0 +1,92 @@ +use once_cell::sync::Lazy; +use serde_json::{json, Value}; + +use crate::common::Server; + +pub(self) static DOCUMENTS: Lazy = Lazy::new(|| { + json!([ + { + "title": "Shazam!", + "genres": ["Action", "Adventure"], + "id": "287947", + }, + { + "title": "Captain Marvel", + "genres": ["Action", "Adventure"], + "id": "299537", + }, + { + "title": "Escape Room", + "genres": ["Horror", "Thriller", "Multiple Words"], + "id": "522681", + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "genres": ["Action", "Comedy"], + "id": "166428", + }, + { + "title": "Gläss", + "genres": ["Thriller"], + "id": "450465", + } + ]) +}); + +#[actix_rt::test] +async fn simple_facet_search() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.update_settings_filterable_attributes(json!(["genres"])).await; + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let (response, code) = + index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; + + assert_eq!(code, 200, "{}", response); + assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 2); + + let (response, code) = + index.facet_search(json!({"facetName": "genres", "facetQuery": "adventure"})).await; + + assert_eq!(code, 200, "{}", response); + assert_eq!(response["facetHits"].as_array().unwrap().len(), 1); +} + +#[actix_rt::test] +async fn non_filterable_facet_search_error() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let (response, code) = + index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; + assert_eq!(code, 400, "{}", response); + + let (response, code) = + index.facet_search(json!({"facetName": "genres", "facetQuery": "adv"})).await; + assert_eq!(code, 400, "{}", response); +} + +#[actix_rt::test] +async fn facet_search_dont_support_words() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.update_settings_filterable_attributes(json!(["genres"])).await; + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let (response, code) = + index.facet_search(json!({"facetName": "genres", "facetQuery": "words"})).await; + + assert_eq!(code, 200, "{}", response); + assert_eq!(response["facetHits"].as_array().unwrap().len(), 0); +} diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 6a55c7569..97556ae2a 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -2,6 +2,7 @@ // should be tested in its own module to isolate tests and keep the tests readable. mod errors; +mod facet_search; mod formatted; mod multi; mod pagination; From 82e1f59f1e3403c94c6454c5a7f52bb4e5e2be9c Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 28 Jun 2023 15:16:04 +0200 Subject: [PATCH 34/38] Add attributes_to_search_on --- meilisearch/src/routes/indexes/facet_search.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 21bc2d9c7..5dd2ac039 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -38,6 +38,8 @@ pub struct FacetSearchQuery { pub filter: Option, #[deserr(default, error = DeserrJsonError, default)] pub matching_strategy: MatchingStrategy, + #[deserr(default, error = DeserrJsonError, default)] + pub attributes_to_search_on: Option>, } pub async fn search( @@ -90,6 +92,7 @@ impl From for SearchQuery { vector, filter, matching_strategy, + attributes_to_search_on, } = value; SearchQuery { @@ -113,6 +116,7 @@ impl From for SearchQuery { crop_marker: DEFAULT_CROP_MARKER(), matching_strategy, vector, + attributes_to_search_on, } } } From efbe7ce78bdbdb57d288caa6d2ff49d4d6c024a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 28 Jun 2023 15:36:32 +0200 Subject: [PATCH 35/38] Clean the facet string FSTs when we clear the documents --- milli/src/update/clear_documents.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index 37c0f32b2..5fdf8ef49 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -34,7 +34,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { script_language_docids, facet_id_f64_docids, facet_id_string_docids, - facet_id_string_fst: _, + facet_id_string_fst, facet_id_exists_docids, facet_id_is_null_docids, facet_id_is_empty_docids, @@ -92,6 +92,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { word_prefix_fid_docids.clear(self.wtxn)?; script_language_docids.clear(self.wtxn)?; facet_id_f64_docids.clear(self.wtxn)?; + facet_id_string_fst.clear(self.wtxn)?; facet_id_exists_docids.clear(self.wtxn)?; facet_id_is_null_docids.clear(self.wtxn)?; facet_id_is_empty_docids.clear(self.wtxn)?; From 3e3f73ba1e92d1b389545512e3b799eb541d7ea6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 28 Jun 2023 15:43:38 +0200 Subject: [PATCH 36/38] Fix the analytics --- meilisearch/src/analytics/segment_analytics.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 939550cb0..597fae8b2 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -969,8 +969,11 @@ impl FacetSearchAggregator { ret.user_agents = extract_user_agents(request).into_iter().collect(); ret.facet_names = Some(facet_name.clone()).into_iter().collect(); - ret.additional_search_parameters_provided = - q.is_some() || filter.is_some() || *matching_strategy != MatchingStrategy::default(); + ret.additional_search_parameters_provided = q.is_some() + || vector.is_some() + || filter.is_some() + || *matching_strategy != MatchingStrategy::default() + || attributes_to_search_on.is_some(); ret } From 605c1dd54aac6af99bf8c0331c32d26c9def4817 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 28 Jun 2023 16:41:56 +0200 Subject: [PATCH 37/38] Fix analytics --- meilisearch/src/analytics/segment_analytics.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 597fae8b2..25aa20a9a 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -960,7 +960,15 @@ pub struct FacetSearchAggregator { impl FacetSearchAggregator { pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self { - let FacetSearchQuery { facet_query: _, facet_name, q, filter, matching_strategy } = query; + let FacetSearchQuery { + facet_query: _, + facet_name, + vector, + q, + filter, + matching_strategy, + attributes_to_search_on, + } = query; let mut ret = Self::default(); ret.timestamp = Some(OffsetDateTime::now_utc()); From 44b5b9e1a744574965062e85656e3917e3fc555d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 29 Jun 2023 10:28:23 +0200 Subject: [PATCH 38/38] Improve the documentation of the FacetSearchQuery struct --- meilisearch/src/routes/indexes/facet_search.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 5dd2ac039..5a5c04f99 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -22,7 +22,9 @@ pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(search))); } -// TODO improve the error messages +/// # Important +/// +/// Intentionally don't use `deny_unknown_fields` to ignore search parameters sent by user #[derive(Debug, Clone, Default, PartialEq, deserr::Deserr)] #[deserr(error = DeserrJsonError, rename_all = camelCase)] pub struct FacetSearchQuery {