From 69c118ef766f19745ba8800c508e3a1cf686288b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 12 Mar 2024 10:35:39 +0100 Subject: [PATCH 1/8] Extract the facet order before extracting the facets values --- meilisearch/src/search.rs | 9 ++++++++- milli/src/search/mod.rs | 3 +++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 27de36c6d..6e253baad 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -726,8 +726,15 @@ pub fn perform_facet_search( let rtxn = index.read_txn()?; let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features, None)?; + let sort_by = { + let sorts = index.sort_facet_values_by(&rtxn)?; + sorts + .get(&facet_name) + .copied() + .unwrap_or_else(|| sorts.get("*").copied().unwrap_or_default()) + }; let mut facet_search = - SearchForFacetValues::new(facet_name, search, search_query.hybrid.is_some()); + SearchForFacetValues::new(facet_name, search, sort_by, search_query.hybrid.is_some()); if let Some(facet_query) = &facet_query { facet_search.query(facet_query); } diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index e411bd032..2b0dbe423 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -307,6 +307,7 @@ pub struct SearchForFacetValues<'a> { facet: String, search_query: Search<'a>, max_values: usize, + sort_by: OrderBy, is_hybrid: bool, } @@ -314,6 +315,7 @@ impl<'a> SearchForFacetValues<'a> { pub fn new( facet: String, search_query: Search<'a>, + sort_by: OrderBy, is_hybrid: bool, ) -> SearchForFacetValues<'a> { SearchForFacetValues { @@ -321,6 +323,7 @@ impl<'a> SearchForFacetValues<'a> { facet, search_query, max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET, + sort_by, is_hybrid, } } From d3a95ea2f66ae90f62385b9b52bf39f66358cf19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 12 Mar 2024 11:01:46 +0100 Subject: [PATCH 2/8] Introduce a new OrderByMap struct to simplify the sort by usage --- meilisearch/src/search.rs | 27 ++++------------- milli/src/index.rs | 14 ++++----- milli/src/lib.rs | 1 + milli/src/order_by_map.rs | 57 ++++++++++++++++++++++++++++++++++++ milli/src/update/settings.rs | 7 +++-- 5 files changed, 73 insertions(+), 33 deletions(-) create mode 100644 milli/src/order_by_map.rs diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 6e253baad..8f3df04e0 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -671,27 +671,16 @@ pub fn perform_search( let sort_facet_values_by = index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?; - let default_sort_facet_values_by = - sort_facet_values_by.get("*").copied().unwrap_or_default(); if fields.iter().all(|f| f != "*") { - let fields: Vec<_> = fields - .iter() - .map(|n| { - ( - n, - sort_facet_values_by - .get(n) - .copied() - .unwrap_or(default_sort_facet_values_by), - ) - }) - .collect(); + let fields: Vec<_> = + fields.iter().map(|n| (n, sort_facet_values_by.get(n))).collect(); facet_distribution.facets(fields); } + let distribution = facet_distribution .candidates(candidates) - .default_order_by(default_sort_facet_values_by) + .default_order_by(sort_facet_values_by.get("*")) .execute()?; let stats = facet_distribution.compute_stats()?; (Some(distribution), Some(stats)) @@ -726,13 +715,7 @@ pub fn perform_facet_search( let rtxn = index.read_txn()?; let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features, None)?; - let sort_by = { - let sorts = index.sort_facet_values_by(&rtxn)?; - sorts - .get(&facet_name) - .copied() - .unwrap_or_else(|| sorts.get("*").copied().unwrap_or_default()) - }; + let sort_by = index.sort_facet_values_by(&rtxn)?.get(&facet_name); let mut facet_search = SearchForFacetValues::new(facet_name, search, sort_by, search_query.hybrid.is_some()); if let Some(facet_query) = &facet_query { diff --git a/milli/src/index.rs b/milli/src/index.rs index 6ad39dcb1..2c3977403 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -20,13 +20,13 @@ use crate::heed_codec::facet::{ use crate::heed_codec::{ BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec, }; +use crate::order_by_map::OrderByMap; use crate::proximity::ProximityPrecision; use crate::vector::EmbeddingConfig; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec, - OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, - BEU32, BEU64, + Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, BEU64, }; pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; @@ -1373,21 +1373,19 @@ impl Index { self.main.remap_key_type::().delete(txn, main_key::MAX_VALUES_PER_FACET) } - pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result> { - let mut orders = self + pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result { + let orders = self .main - .remap_types::>>() + .remap_types::>() .get(txn, main_key::SORT_FACET_VALUES_BY)? .unwrap_or_default(); - // Insert the default ordering if it is not already overwritten by the user. - orders.entry("*".to_string()).or_insert(OrderBy::Lexicographic); Ok(orders) } pub(crate) fn put_sort_facet_values_by( &self, txn: &mut RwTxn, - val: &HashMap, + val: &OrderByMap, ) -> heed::Result<()> { self.main.remap_types::>().put(txn, main_key::SORT_FACET_VALUES_BY, &val) } diff --git a/milli/src/lib.rs b/milli/src/lib.rs index f6b398304..be79a7e86 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -16,6 +16,7 @@ pub mod facet; mod fields_ids_map; pub mod heed_codec; pub mod index; +pub mod order_by_map; pub mod prompt; pub mod proximity; pub mod score_details; diff --git a/milli/src/order_by_map.rs b/milli/src/order_by_map.rs new file mode 100644 index 000000000..287e62c3a --- /dev/null +++ b/milli/src/order_by_map.rs @@ -0,0 +1,57 @@ +use std::collections::{hash_map, HashMap}; +use std::iter::FromIterator; + +use serde::{Deserialize, Deserializer, Serialize}; + +use crate::OrderBy; + +#[derive(Serialize)] +pub struct OrderByMap(HashMap); + +impl OrderByMap { + pub fn get(&self, key: impl AsRef) -> OrderBy { + self.0 + .get(key.as_ref()) + .copied() + .unwrap_or_else(|| self.0.get("*").copied().unwrap_or_default()) + } + + pub fn insert(&mut self, key: String, value: OrderBy) -> Option { + self.0.insert(key, value) + } +} + +impl Default for OrderByMap { + fn default() -> Self { + let mut map = HashMap::new(); + map.insert("*".to_string(), OrderBy::Lexicographic); + OrderByMap(map) + } +} + +impl FromIterator<(String, OrderBy)> for OrderByMap { + fn from_iter>(iter: T) -> Self { + OrderByMap(iter.into_iter().collect()) + } +} + +impl IntoIterator for OrderByMap { + type Item = (String, OrderBy); + type IntoIter = hash_map::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +impl<'de> Deserialize<'de> for OrderByMap { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let mut map = Deserialize::deserialize(deserializer).map(OrderByMap)?; + // Insert the default ordering if it is not already overwritten by the user. + map.0.entry("*".to_string()).or_insert(OrderBy::default()); + Ok(map) + } +} diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 3cad79467..dcf41970e 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -14,12 +14,13 @@ use super::IndexerConfig; use crate::criterion::Criterion; use crate::error::UserError; use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; +use crate::order_by_map::OrderByMap; use crate::proximity::ProximityPrecision; use crate::update::index_documents::IndexDocumentsMethod; use crate::update::{IndexDocuments, UpdateIndexingStep}; use crate::vector::settings::{check_set, check_unset, EmbedderSource, EmbeddingSettings}; use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs}; -use crate::{FieldsIdsMap, Index, OrderBy, Result}; +use crate::{FieldsIdsMap, Index, Result}; #[derive(Debug, Clone, PartialEq, Eq, Copy)] pub enum Setting { @@ -145,7 +146,7 @@ pub struct Settings<'a, 't, 'i> { /// Attributes on which typo tolerance is disabled. exact_attributes: Setting>, max_values_per_facet: Setting, - sort_facet_values_by: Setting>, + sort_facet_values_by: Setting, pagination_max_total_hits: Setting, proximity_precision: Setting, embedder_settings: Setting>>, @@ -340,7 +341,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.max_values_per_facet = Setting::Reset; } - pub fn set_sort_facet_values_by(&mut self, value: HashMap) { + pub fn set_sort_facet_values_by(&mut self, value: OrderByMap) { self.sort_facet_values_by = Setting::Set(value); } From 9f7a4fbfeb4d5acaa6fef10e27753cb1530256e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 12 Mar 2024 18:04:38 +0100 Subject: [PATCH 3/8] Return the facets of a placeholder facet-search sorted by count --- meilisearch/src/search.rs | 3 +- milli/src/search/mod.rs | 80 +++++++++++++++++++++++++++++++-------- 2 files changed, 65 insertions(+), 18 deletions(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 8f3df04e0..d98e96e87 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -715,9 +715,8 @@ pub fn perform_facet_search( let rtxn = index.read_txn()?; let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features, None)?; - let sort_by = index.sort_facet_values_by(&rtxn)?.get(&facet_name); let mut facet_search = - SearchForFacetValues::new(facet_name, search, sort_by, search_query.hybrid.is_some()); + SearchForFacetValues::new(facet_name, search, search_query.hybrid.is_some()); if let Some(facet_query) = &facet_query { facet_search.query(facet_query); } diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 2b0dbe423..fbd76613e 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -1,3 +1,5 @@ +use std::cmp::{Ordering, Reverse}; +use std::collections::BinaryHeap; use std::fmt; use std::ops::ControlFlow; @@ -307,7 +309,6 @@ pub struct SearchForFacetValues<'a> { facet: String, search_query: Search<'a>, max_values: usize, - sort_by: OrderBy, is_hybrid: bool, } @@ -315,7 +316,6 @@ impl<'a> SearchForFacetValues<'a> { pub fn new( facet: String, search_query: Search<'a>, - sort_by: OrderBy, is_hybrid: bool, ) -> SearchForFacetValues<'a> { SearchForFacetValues { @@ -323,7 +323,6 @@ impl<'a> SearchForFacetValues<'a> { facet, search_query, max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET, - sort_by, is_hybrid, } } @@ -384,6 +383,8 @@ impl<'a> SearchForFacetValues<'a> { .search_query .execute_for_candidates(self.is_hybrid || self.search_query.vector.is_some())?; + let sort_by = index.sort_facet_values_by(rtxn)?.get(&self.facet); + match self.query.as_ref() { Some(query) => { let options = NormalizerOption { lossy: true, ..Default::default() }; @@ -465,23 +466,56 @@ impl<'a> SearchForFacetValues<'a> { } } None => { - let mut results = vec![]; let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" }; - for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? { - let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) = - result?; - let count = search_candidates.intersection_len(&bitmap); - if count != 0 { - let value = self - .one_original_value_of(fid, left_bound, bitmap.min().unwrap())? - .unwrap_or_else(|| left_bound.to_string()); - results.push(FacetValueHit { value, count }); + + match sort_by { + OrderBy::Lexicographic => { + let mut results = vec![]; + for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? { + let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) = + result?; + let count = search_candidates.intersection_len(&bitmap); + if count != 0 { + let value = self + .one_original_value_of(fid, left_bound, bitmap.min().unwrap())? + .unwrap_or_else(|| left_bound.to_string()); + results.push(FacetValueHit { value, count }); + } + if results.len() >= self.max_values { + break; + } + } + Ok(results) } - if results.len() >= self.max_values { - break; + OrderBy::Count => { + let mut top_counts = BinaryHeap::new(); + for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? { + let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) = + result?; + let count = search_candidates.intersection_len(&bitmap); + if count != 0 { + let value = self + .one_original_value_of(fid, left_bound, bitmap.min().unwrap())? + .unwrap_or_else(|| left_bound.to_string()); + if top_counts.len() >= self.max_values { + top_counts.pop(); + } + // It is a max heap and we need to move the smallest counts at the + // top to be able to pop them when we reach the max_values limit. + top_counts.push(Reverse(FacetValueHit { value, count })); + } + } + + // Convert the heap into a vec of hits by removing the Reverse wrapper. + // Hits are already in the right order as they were reversed and there + // are output in ascending order. + Ok(top_counts + .into_sorted_vec() + .into_iter() + .map(|Reverse(hit)| hit) + .collect()) } } - Ok(results) } } } @@ -539,6 +573,20 @@ pub struct FacetValueHit { pub count: u64, } +impl PartialOrd for FacetValueHit { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for FacetValueHit { + fn cmp(&self, other: &Self) -> Ordering { + self.count.cmp(&other.count).then_with(|| self.value.cmp(&other.value)) + } +} + +impl Eq for FacetValueHit {} + #[cfg(test)] mod test { #[allow(unused_imports)] From 306b25ad3a7006fd7ac3c9edbb153b1a88a06bc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 13 Mar 2024 10:24:21 +0100 Subject: [PATCH 4/8] Move the searchForFacetValues struct into a dedicated module --- milli/src/lib.rs | 6 +- milli/src/search/facet/mod.rs | 3 + milli/src/search/facet/search.rs | 300 +++++++++++++++++++++++++++++++ milli/src/search/mod.rs | 300 +------------------------------ 4 files changed, 308 insertions(+), 301 deletions(-) create mode 100644 milli/src/search/facet/search.rs diff --git a/milli/src/lib.rs b/milli/src/lib.rs index be79a7e86..5effcea3d 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -57,10 +57,10 @@ pub use self::heed_codec::{ UncheckedU8StrStrCodec, }; pub use self::index::Index; +pub use self::search::facet::{FacetValueHit, SearchForFacetValues}; pub use self::search::{ - FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder, - MatchingWords, OrderBy, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy, - DEFAULT_VALUES_PER_FACET, + FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy, + Search, SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, }; pub type Result = std::result::Result; diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index f676ee109..34a9cdcb8 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -6,15 +6,18 @@ use roaring::RoaringBitmap; pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::filter::{BadGeoError, Filter}; +pub use self::search::{FacetValueHit, SearchForFacetValues}; use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec}; use crate::heed_codec::BytesRefCodec; use crate::{Index, Result}; + mod facet_distribution; mod facet_distribution_iter; mod facet_range_search; mod facet_sort_ascending; mod facet_sort_descending; mod filter; +mod search; fn facet_extreme_value<'t>( mut extreme_it: impl Iterator> + 't, diff --git a/milli/src/search/facet/search.rs b/milli/src/search/facet/search.rs new file mode 100644 index 000000000..39bb74ace --- /dev/null +++ b/milli/src/search/facet/search.rs @@ -0,0 +1,300 @@ +use std::cmp::{Ordering, Reverse}; +use std::collections::BinaryHeap; +use std::ops::ControlFlow; + +use charabia::normalizer::NormalizerOption; +use charabia::Normalize; +use fst::automaton::{Automaton, Str}; +use fst::{IntoStreamer, Streamer}; +use roaring::RoaringBitmap; +use tracing::error; + +use crate::error::UserError; +use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; +use crate::search::build_dfa; +use crate::{DocumentId, FieldId, OrderBy, Result, Search}; + +/// The maximum number of values per facet returned by the facet search route. +const DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET: usize = 100; + +pub struct SearchForFacetValues<'a> { + query: Option, + facet: String, + search_query: Search<'a>, + max_values: usize, + is_hybrid: bool, +} + +impl<'a> SearchForFacetValues<'a> { + pub fn new( + facet: String, + search_query: Search<'a>, + is_hybrid: bool, + ) -> SearchForFacetValues<'a> { + SearchForFacetValues { + query: None, + facet, + search_query, + max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET, + is_hybrid, + } + } + + pub fn query(&mut self, query: impl Into) -> &mut Self { + self.query = Some(query.into()); + self + } + + pub fn max_values(&mut self, max: usize) -> &mut Self { + self.max_values = max; + self + } + + fn one_original_value_of( + &self, + field_id: FieldId, + facet_str: &str, + any_docid: DocumentId, + ) -> Result> { + let index = self.search_query.index; + let rtxn = self.search_query.rtxn; + let key: (FieldId, _, &str) = (field_id, any_docid, facet_str); + Ok(index.field_id_docid_facet_strings.get(rtxn, &key)?.map(|v| v.to_owned())) + } + + pub fn execute(&self) -> Result> { + let index = self.search_query.index; + let rtxn = self.search_query.rtxn; + + let filterable_fields = index.filterable_fields(rtxn)?; + if !filterable_fields.contains(&self.facet) { + let (valid_fields, hidden_fields) = + index.remove_hidden_fields(rtxn, filterable_fields)?; + + return Err(UserError::InvalidFacetSearchFacetName { + field: self.facet.clone(), + valid_fields, + hidden_fields, + } + .into()); + } + + let fields_ids_map = index.fields_ids_map(rtxn)?; + let fid = match fields_ids_map.id(&self.facet) { + Some(fid) => fid, + // we return an empty list of results when the attribute has been + // set as filterable but no document contains this field (yet). + None => return Ok(Vec::new()), + }; + + let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? { + Some(fst) => fst, + None => return Ok(vec![]), + }; + + let search_candidates = self + .search_query + .execute_for_candidates(self.is_hybrid || self.search_query.vector.is_some())?; + + let sort_by = index.sort_facet_values_by(rtxn)?.get(&self.facet); + + match self.query.as_ref() { + Some(query) => { + let options = NormalizerOption { lossy: true, ..Default::default() }; + let query = query.normalize(&options); + let query = query.as_ref(); + + let authorize_typos = self.search_query.index.authorize_typos(rtxn)?; + let field_authorizes_typos = + !self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid); + + if authorize_typos && field_authorizes_typos { + let exact_words_fst = self.search_query.index.exact_words(rtxn)?; + if exact_words_fst.map_or(false, |fst| fst.contains(query)) { + let mut results = vec![]; + if fst.contains(query) { + self.fetch_original_facets_using_normalized( + fid, + query, + query, + &search_candidates, + &mut results, + )?; + } + Ok(results) + } else { + let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?; + let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?; + + let is_prefix = true; + let automaton = if query.len() < one_typo as usize { + build_dfa(query, 0, is_prefix) + } else if query.len() < two_typos as usize { + build_dfa(query, 1, is_prefix) + } else { + build_dfa(query, 2, is_prefix) + }; + + let mut stream = fst.search(automaton).into_stream(); + let mut results = vec![]; + while let Some(facet_value) = stream.next() { + let value = std::str::from_utf8(facet_value)?; + if self + .fetch_original_facets_using_normalized( + fid, + value, + query, + &search_candidates, + &mut results, + )? + .is_break() + { + break; + } + } + + Ok(results) + } + } else { + let automaton = Str::new(query).starts_with(); + let mut stream = fst.search(automaton).into_stream(); + let mut results = vec![]; + while let Some(facet_value) = stream.next() { + let value = std::str::from_utf8(facet_value)?; + if self + .fetch_original_facets_using_normalized( + fid, + value, + query, + &search_candidates, + &mut results, + )? + .is_break() + { + break; + } + } + + Ok(results) + } + } + None => { + let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" }; + match sort_by { + OrderBy::Lexicographic => { + let mut results = vec![]; + for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? { + let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) = + result?; + let count = search_candidates.intersection_len(&bitmap); + if count != 0 { + let value = self + .one_original_value_of(fid, left_bound, bitmap.min().unwrap())? + .unwrap_or_else(|| left_bound.to_string()); + results.push(FacetValueHit { value, count }); + } + if results.len() >= self.max_values { + break; + } + } + Ok(results) + } + OrderBy::Count => { + let mut top_counts = BinaryHeap::new(); + for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? { + let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) = + result?; + let count = search_candidates.intersection_len(&bitmap); + if count != 0 { + let value = self + .one_original_value_of(fid, left_bound, bitmap.min().unwrap())? + .unwrap_or_else(|| left_bound.to_string()); + if top_counts.len() >= self.max_values { + top_counts.pop(); + } + // It is a max heap and we need to move the smallest counts at the + // top to be able to pop them when we reach the max_values limit. + top_counts.push(Reverse(FacetValueHit { value, count })); + } + } + + // Convert the heap into a vec of hits by removing the Reverse wrapper. + // Hits are already in the right order as they were reversed and there + // are output in ascending order. + Ok(top_counts + .into_sorted_vec() + .into_iter() + .map(|Reverse(hit)| hit) + .collect()) + } + } + } + } + } + + fn fetch_original_facets_using_normalized( + &self, + fid: FieldId, + value: &str, + query: &str, + search_candidates: &RoaringBitmap, + results: &mut Vec, + ) -> Result> { + let index = self.search_query.index; + let rtxn = self.search_query.rtxn; + + let database = index.facet_id_normalized_string_strings; + let key = (fid, value); + let original_strings = match database.get(rtxn, &key)? { + Some(original_strings) => original_strings, + None => { + error!("the facet value is missing from the facet database: {key:?}"); + return Ok(ControlFlow::Continue(())); + } + }; + for original in original_strings { + let key = FacetGroupKey { field_id: fid, level: 0, left_bound: original.as_str() }; + let docids = match index.facet_id_string_docids.get(rtxn, &key)? { + Some(FacetGroupValue { bitmap, .. }) => bitmap, + None => { + error!("the facet value is missing from the facet database: {key:?}"); + return Ok(ControlFlow::Continue(())); + } + }; + let count = search_candidates.intersection_len(&docids); + if count != 0 { + let value = self + .one_original_value_of(fid, &original, docids.min().unwrap())? + .unwrap_or_else(|| query.to_string()); + results.push(FacetValueHit { value, count }); + } + if results.len() >= self.max_values { + return Ok(ControlFlow::Break(())); + } + } + + Ok(ControlFlow::Continue(())) + } +} + +#[derive(Debug, Clone, serde::Serialize, PartialEq)] +pub struct FacetValueHit { + /// The original facet value + pub value: String, + /// The number of documents associated to this facet + pub count: u64, +} + +impl PartialOrd for FacetValueHit { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for FacetValueHit { + fn cmp(&self, other: &Self) -> Ordering { + self.count.cmp(&other.count).then_with(|| self.value.cmp(&other.value)) + } +} + +impl Eq for FacetValueHit {} diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index fbd76613e..dc8354486 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -1,27 +1,17 @@ -use std::cmp::{Ordering, Reverse}; -use std::collections::BinaryHeap; use std::fmt; -use std::ops::ControlFlow; -use charabia::normalizer::NormalizerOption; -use charabia::Normalize; -use fst::automaton::{Automaton, Str}; -use fst::{IntoStreamer, Streamer}; use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; -use tracing::error; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; use self::new::{execute_vector_search, PartialSearchResult}; -use crate::error::UserError; -use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::vector::DistributionShift; use crate::{ - execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, - Result, SearchContext, + execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, + SearchContext, }; // Building these factories is not free. @@ -29,9 +19,6 @@ static LEVDIST0: Lazy = Lazy::new(|| LevBuilder::new(0, true)); static LEVDIST1: Lazy = Lazy::new(|| LevBuilder::new(1, true)); static LEVDIST2: Lazy = Lazy::new(|| LevBuilder::new(2, true)); -/// The maximum number of values per facet returned by the facet search route. -const DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET: usize = 100; - pub mod facet; mod fst_utils; pub mod hybrid; @@ -304,289 +291,6 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA { } } -pub struct SearchForFacetValues<'a> { - query: Option, - facet: String, - search_query: Search<'a>, - max_values: usize, - is_hybrid: bool, -} - -impl<'a> SearchForFacetValues<'a> { - pub fn new( - facet: String, - search_query: Search<'a>, - is_hybrid: bool, - ) -> SearchForFacetValues<'a> { - SearchForFacetValues { - query: None, - facet, - search_query, - max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET, - is_hybrid, - } - } - - pub fn query(&mut self, query: impl Into) -> &mut Self { - self.query = Some(query.into()); - self - } - - pub fn max_values(&mut self, max: usize) -> &mut Self { - self.max_values = max; - self - } - - fn one_original_value_of( - &self, - field_id: FieldId, - facet_str: &str, - any_docid: DocumentId, - ) -> Result> { - let index = self.search_query.index; - let rtxn = self.search_query.rtxn; - let key: (FieldId, _, &str) = (field_id, any_docid, facet_str); - Ok(index.field_id_docid_facet_strings.get(rtxn, &key)?.map(|v| v.to_owned())) - } - - pub fn execute(&self) -> Result> { - let index = self.search_query.index; - let rtxn = self.search_query.rtxn; - - let filterable_fields = index.filterable_fields(rtxn)?; - if !filterable_fields.contains(&self.facet) { - let (valid_fields, hidden_fields) = - index.remove_hidden_fields(rtxn, filterable_fields)?; - - return Err(UserError::InvalidFacetSearchFacetName { - field: self.facet.clone(), - valid_fields, - hidden_fields, - } - .into()); - } - - let fields_ids_map = index.fields_ids_map(rtxn)?; - let fid = match fields_ids_map.id(&self.facet) { - Some(fid) => fid, - // we return an empty list of results when the attribute has been - // set as filterable but no document contains this field (yet). - None => return Ok(Vec::new()), - }; - - let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? { - Some(fst) => fst, - None => return Ok(vec![]), - }; - - let search_candidates = self - .search_query - .execute_for_candidates(self.is_hybrid || self.search_query.vector.is_some())?; - - let sort_by = index.sort_facet_values_by(rtxn)?.get(&self.facet); - - match self.query.as_ref() { - Some(query) => { - let options = NormalizerOption { lossy: true, ..Default::default() }; - let query = query.normalize(&options); - let query = query.as_ref(); - - let authorize_typos = self.search_query.index.authorize_typos(rtxn)?; - let field_authorizes_typos = - !self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid); - - if authorize_typos && field_authorizes_typos { - let exact_words_fst = self.search_query.index.exact_words(rtxn)?; - if exact_words_fst.map_or(false, |fst| fst.contains(query)) { - let mut results = vec![]; - if fst.contains(query) { - self.fetch_original_facets_using_normalized( - fid, - query, - query, - &search_candidates, - &mut results, - )?; - } - Ok(results) - } else { - let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?; - let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?; - - let is_prefix = true; - let automaton = if query.len() < one_typo as usize { - build_dfa(query, 0, is_prefix) - } else if query.len() < two_typos as usize { - build_dfa(query, 1, is_prefix) - } else { - build_dfa(query, 2, is_prefix) - }; - - let mut stream = fst.search(automaton).into_stream(); - let mut results = vec![]; - while let Some(facet_value) = stream.next() { - let value = std::str::from_utf8(facet_value)?; - if self - .fetch_original_facets_using_normalized( - fid, - value, - query, - &search_candidates, - &mut results, - )? - .is_break() - { - break; - } - } - - Ok(results) - } - } else { - let automaton = Str::new(query).starts_with(); - let mut stream = fst.search(automaton).into_stream(); - let mut results = vec![]; - while let Some(facet_value) = stream.next() { - let value = std::str::from_utf8(facet_value)?; - if self - .fetch_original_facets_using_normalized( - fid, - value, - query, - &search_candidates, - &mut results, - )? - .is_break() - { - break; - } - } - - Ok(results) - } - } - None => { - let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" }; - - match sort_by { - OrderBy::Lexicographic => { - let mut results = vec![]; - for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? { - let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) = - result?; - let count = search_candidates.intersection_len(&bitmap); - if count != 0 { - let value = self - .one_original_value_of(fid, left_bound, bitmap.min().unwrap())? - .unwrap_or_else(|| left_bound.to_string()); - results.push(FacetValueHit { value, count }); - } - if results.len() >= self.max_values { - break; - } - } - Ok(results) - } - OrderBy::Count => { - let mut top_counts = BinaryHeap::new(); - for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? { - let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) = - result?; - let count = search_candidates.intersection_len(&bitmap); - if count != 0 { - let value = self - .one_original_value_of(fid, left_bound, bitmap.min().unwrap())? - .unwrap_or_else(|| left_bound.to_string()); - if top_counts.len() >= self.max_values { - top_counts.pop(); - } - // It is a max heap and we need to move the smallest counts at the - // top to be able to pop them when we reach the max_values limit. - top_counts.push(Reverse(FacetValueHit { value, count })); - } - } - - // Convert the heap into a vec of hits by removing the Reverse wrapper. - // Hits are already in the right order as they were reversed and there - // are output in ascending order. - Ok(top_counts - .into_sorted_vec() - .into_iter() - .map(|Reverse(hit)| hit) - .collect()) - } - } - } - } - } - - fn fetch_original_facets_using_normalized( - &self, - fid: FieldId, - value: &str, - query: &str, - search_candidates: &RoaringBitmap, - results: &mut Vec, - ) -> Result> { - let index = self.search_query.index; - let rtxn = self.search_query.rtxn; - - let database = index.facet_id_normalized_string_strings; - let key = (fid, value); - let original_strings = match database.get(rtxn, &key)? { - Some(original_strings) => original_strings, - None => { - error!("the facet value is missing from the facet database: {key:?}"); - return Ok(ControlFlow::Continue(())); - } - }; - for original in original_strings { - let key = FacetGroupKey { field_id: fid, level: 0, left_bound: original.as_str() }; - let docids = match index.facet_id_string_docids.get(rtxn, &key)? { - Some(FacetGroupValue { bitmap, .. }) => bitmap, - None => { - error!("the facet value is missing from the facet database: {key:?}"); - return Ok(ControlFlow::Continue(())); - } - }; - let count = search_candidates.intersection_len(&docids); - if count != 0 { - let value = self - .one_original_value_of(fid, &original, docids.min().unwrap())? - .unwrap_or_else(|| query.to_string()); - results.push(FacetValueHit { value, count }); - } - if results.len() >= self.max_values { - return Ok(ControlFlow::Break(())); - } - } - - Ok(ControlFlow::Continue(())) - } -} - -#[derive(Debug, Clone, serde::Serialize, PartialEq)] -pub struct FacetValueHit { - /// The original facet value - pub value: String, - /// The number of documents associated to this facet - pub count: u64, -} - -impl PartialOrd for FacetValueHit { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for FacetValueHit { - fn cmp(&self, other: &Self) -> Ordering { - self.count.cmp(&other.count).then_with(|| self.value.cmp(&other.value)) - } -} - -impl Eq for FacetValueHit {} - #[cfg(test)] mod test { #[allow(unused_imports)] From b918b55c6b7e7856eee3e7359d70bcf4124cce4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 13 Mar 2024 11:06:13 +0100 Subject: [PATCH 5/8] Introduce a new facet value collection wrapper to simply the usage --- milli/src/search/facet/search.rs | 67 ++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/milli/src/search/facet/search.rs b/milli/src/search/facet/search.rs index 39bb74ace..504221837 100644 --- a/milli/src/search/facet/search.rs +++ b/milli/src/search/facet/search.rs @@ -6,6 +6,8 @@ use charabia::normalizer::NormalizerOption; use charabia::Normalize; use fst::automaton::{Automaton, Str}; use fst::{IntoStreamer, Streamer}; +use futures::stream::PeekMut; +use itertools::concat; use roaring::RoaringBitmap; use tracing::error; @@ -298,3 +300,68 @@ impl Ord for FacetValueHit { } impl Eq for FacetValueHit {} + +/// A wrapper type that collects the best facet values by +/// lexicographic or number of associated values. +enum ValuesCollection { + /// Keeps the top values according to the lexicographic order. + Lexicographic { max: usize, content: Vec }, + /// Keeps the top values according to the number of values associated to them. + /// + /// Note that it is a max heap and we need to move the smallest counts + /// at the top to be able to pop them when we reach the max_values limit. + Count { max: usize, content: BinaryHeap> }, +} + +impl ValuesCollection { + pub fn new_lexicographic(max: usize) -> Self { + ValuesCollection::Lexicographic { max, content: Vec::with_capacity(max) } + } + + pub fn new_count(max: usize) -> Self { + ValuesCollection::Count { max, content: BinaryHeap::with_capacity(max) } + } + + pub fn insert(&mut self, value: FacetValueHit) -> ControlFlow<()> { + match self { + ValuesCollection::Lexicographic { max, content } => { + if content.len() < *max { + content.push(value); + if content.len() < *max { + return ControlFlow::Continue(()); + } + } + ControlFlow::Break(()) + } + ValuesCollection::Count { max, content } => { + if content.len() == *max { + // Peeking gives us the worst value in the list as + // this is a max-heap and we reversed it. + let Some(mut peek) = content.peek_mut() else { return ControlFlow::Break(()) }; + if peek.0.count <= value.count { + // Replace the current worst value in the heap + // with the new one we received that is better. + *peek = Reverse(value); + } + } else { + content.push(Reverse(value)); + } + ControlFlow::Continue(()) + } + } + } + + /// Returns the list of facet values in descending order of, either, + /// count or lexicographic order of the value depending on the type. + pub fn into_sorted_vec(self) -> Vec { + match self { + ValuesCollection::Lexicographic { content, .. } => content.into_iter().collect(), + ValuesCollection::Count { content, .. } => { + // Convert the heap into a vec of hits by removing the Reverse wrapper. + // Hits are already in the right order as they were reversed and there + // are output in ascending order. + content.into_sorted_vec().into_iter().map(|Reverse(hit)| hit).collect() + } + } + } +} From e0dac5a22f5af7bda383ee144981d2567689280a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 13 Mar 2024 11:13:46 +0100 Subject: [PATCH 6/8] Simplify the algorithm by using the new facet values collection wrapper --- milli/src/search/facet/search.rs | 85 +++++++++----------------------- 1 file changed, 22 insertions(+), 63 deletions(-) diff --git a/milli/src/search/facet/search.rs b/milli/src/search/facet/search.rs index 504221837..a917bcf7c 100644 --- a/milli/src/search/facet/search.rs +++ b/milli/src/search/facet/search.rs @@ -6,8 +6,6 @@ use charabia::normalizer::NormalizerOption; use charabia::Normalize; use fst::automaton::{Automaton, Str}; use fst::{IntoStreamer, Streamer}; -use futures::stream::PeekMut; -use itertools::concat; use roaring::RoaringBitmap; use tracing::error; @@ -98,7 +96,10 @@ impl<'a> SearchForFacetValues<'a> { .search_query .execute_for_candidates(self.is_hybrid || self.search_query.vector.is_some())?; - let sort_by = index.sort_facet_values_by(rtxn)?.get(&self.facet); + let mut results = match index.sort_facet_values_by(rtxn)?.get(&self.facet) { + OrderBy::Lexicographic => ValuesCollection::by_lexicographic(self.max_values), + OrderBy::Count => ValuesCollection::by_count(self.max_values), + }; match self.query.as_ref() { Some(query) => { @@ -113,7 +114,6 @@ impl<'a> SearchForFacetValues<'a> { if authorize_typos && field_authorizes_typos { let exact_words_fst = self.search_query.index.exact_words(rtxn)?; if exact_words_fst.map_or(false, |fst| fst.contains(query)) { - let mut results = vec![]; if fst.contains(query) { self.fetch_original_facets_using_normalized( fid, @@ -123,7 +123,6 @@ impl<'a> SearchForFacetValues<'a> { &mut results, )?; } - Ok(results) } else { let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?; let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?; @@ -138,7 +137,6 @@ impl<'a> SearchForFacetValues<'a> { }; let mut stream = fst.search(automaton).into_stream(); - let mut results = vec![]; while let Some(facet_value) = stream.next() { let value = std::str::from_utf8(facet_value)?; if self @@ -154,13 +152,10 @@ impl<'a> SearchForFacetValues<'a> { break; } } - - Ok(results) } } else { let automaton = Str::new(query).starts_with(); let mut stream = fst.search(automaton).into_stream(); - let mut results = vec![]; while let Some(facet_value) = stream.next() { let value = std::str::from_utf8(facet_value)?; if self @@ -176,62 +171,27 @@ impl<'a> SearchForFacetValues<'a> { break; } } - - Ok(results) } } None => { let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" }; - match sort_by { - OrderBy::Lexicographic => { - let mut results = vec![]; - for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? { - let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) = - result?; - let count = search_candidates.intersection_len(&bitmap); - if count != 0 { - let value = self - .one_original_value_of(fid, left_bound, bitmap.min().unwrap())? - .unwrap_or_else(|| left_bound.to_string()); - results.push(FacetValueHit { value, count }); - } - if results.len() >= self.max_values { - break; - } + for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? { + let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) = + result?; + let count = search_candidates.intersection_len(&bitmap); + if count != 0 { + let value = self + .one_original_value_of(fid, left_bound, bitmap.min().unwrap())? + .unwrap_or_else(|| left_bound.to_string()); + if results.insert(FacetValueHit { value, count }).is_break() { + break; } - Ok(results) - } - OrderBy::Count => { - let mut top_counts = BinaryHeap::new(); - for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? { - let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) = - result?; - let count = search_candidates.intersection_len(&bitmap); - if count != 0 { - let value = self - .one_original_value_of(fid, left_bound, bitmap.min().unwrap())? - .unwrap_or_else(|| left_bound.to_string()); - if top_counts.len() >= self.max_values { - top_counts.pop(); - } - // It is a max heap and we need to move the smallest counts at the - // top to be able to pop them when we reach the max_values limit. - top_counts.push(Reverse(FacetValueHit { value, count })); - } - } - - // Convert the heap into a vec of hits by removing the Reverse wrapper. - // Hits are already in the right order as they were reversed and there - // are output in ascending order. - Ok(top_counts - .into_sorted_vec() - .into_iter() - .map(|Reverse(hit)| hit) - .collect()) } } } } + + Ok(results.into_sorted_vec()) } fn fetch_original_facets_using_normalized( @@ -240,7 +200,7 @@ impl<'a> SearchForFacetValues<'a> { value: &str, query: &str, search_candidates: &RoaringBitmap, - results: &mut Vec, + results: &mut ValuesCollection, ) -> Result> { let index = self.search_query.index; let rtxn = self.search_query.rtxn; @@ -268,10 +228,9 @@ impl<'a> SearchForFacetValues<'a> { let value = self .one_original_value_of(fid, &original, docids.min().unwrap())? .unwrap_or_else(|| query.to_string()); - results.push(FacetValueHit { value, count }); - } - if results.len() >= self.max_values { - return Ok(ControlFlow::Break(())); + if results.insert(FacetValueHit { value, count }).is_break() { + break; + } } } @@ -314,11 +273,11 @@ enum ValuesCollection { } impl ValuesCollection { - pub fn new_lexicographic(max: usize) -> Self { + pub fn by_lexicographic(max: usize) -> Self { ValuesCollection::Lexicographic { max, content: Vec::with_capacity(max) } } - pub fn new_count(max: usize) -> Self { + pub fn by_count(max: usize) -> Self { ValuesCollection::Count { max, content: BinaryHeap::with_capacity(max) } } From 6c9823d7bba6aaac736a26586c775ce0e3a0d4e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 13 Mar 2024 11:57:55 +0100 Subject: [PATCH 7/8] Add tests to sortFacetValuesBy count --- meilisearch/tests/search/facet_search.rs | 43 ++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/meilisearch/tests/search/facet_search.rs b/meilisearch/tests/search/facet_search.rs index 5f9f631f9..12d2226a9 100644 --- a/meilisearch/tests/search/facet_search.rs +++ b/meilisearch/tests/search/facet_search.rs @@ -123,6 +123,28 @@ async fn simple_facet_search_with_max_values() { assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1); } +#[actix_rt::test] +async fn simple_facet_search_by_count_with_max_values() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index + .update_settings_faceting( + json!({ "maxValuesPerFacet": 1, "sortFacetValuesBy": { "*": "count" } }), + ) + .await; + index.update_settings_filterable_attributes(json!(["genres"])).await; + index.add_documents(documents, None).await; + index.wait_task(2).await; + + let (response, code) = + index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; + + assert_eq!(code, 200, "{}", response); + assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1); +} + #[actix_rt::test] async fn non_filterable_facet_search_error() { let server = Server::new().await; @@ -157,3 +179,24 @@ async fn facet_search_dont_support_words() { assert_eq!(code, 200, "{}", response); assert_eq!(response["facetHits"].as_array().unwrap().len(), 0); } + +#[actix_rt::test] +async fn simple_facet_search_with_sort_by_count() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.update_settings_faceting(json!({ "sortFacetValuesBy": { "*": "count" } })).await; + index.update_settings_filterable_attributes(json!(["genres"])).await; + index.add_documents(documents, None).await; + index.wait_task(2).await; + + let (response, code) = + index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; + + assert_eq!(code, 200, "{}", response); + let hits = response["facetHits"].as_array().unwrap(); + assert_eq!(hits.len(), 2); + assert_eq!(hits[0], json!({ "value": "Action", "count": 3 })); + assert_eq!(hits[1], json!({ "value": "Adventure", "count": 2 })); +} From f3fc2bd01fadfab8fcfc5c8b7d0802e235fa2781 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 13 Mar 2024 15:22:14 +0100 Subject: [PATCH 8/8] Address some issues with preallocations --- milli/src/search/facet/search.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/milli/src/search/facet/search.rs b/milli/src/search/facet/search.rs index a917bcf7c..0251d6b8d 100644 --- a/milli/src/search/facet/search.rs +++ b/milli/src/search/facet/search.rs @@ -89,7 +89,7 @@ impl<'a> SearchForFacetValues<'a> { let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? { Some(fst) => fst, - None => return Ok(vec![]), + None => return Ok(Vec::new()), }; let search_candidates = self @@ -274,11 +274,11 @@ enum ValuesCollection { impl ValuesCollection { pub fn by_lexicographic(max: usize) -> Self { - ValuesCollection::Lexicographic { max, content: Vec::with_capacity(max) } + ValuesCollection::Lexicographic { max, content: Vec::new() } } pub fn by_count(max: usize) -> Self { - ValuesCollection::Count { max, content: BinaryHeap::with_capacity(max) } + ValuesCollection::Count { max, content: BinaryHeap::new() } } pub fn insert(&mut self, value: FacetValueHit) -> ControlFlow<()> {