From 84f8938f3396afd4688319a5e72fc0d63838a341 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Thu, 25 May 2023 10:57:08 +0200 Subject: [PATCH 01/19] Rename facet distribution to be explicit on the order to find them --- milli/src/search/facet/facet_distribution.rs | 4 ++-- .../search/facet/facet_distribution_iter.rs | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index f5f32fecf..45ce13a28 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -136,7 +136,7 @@ impl<'a> FacetDistribution<'a> { candidates: &RoaringBitmap, distribution: &mut BTreeMap, ) -> heed::Result<()> { - facet_distribution_iter::iterate_over_facet_distribution( + facet_distribution_iter::lexicographically_iterate_over_facet_distribution( self.rtxn, self.index .facet_id_f64_docids @@ -161,7 +161,7 @@ impl<'a> FacetDistribution<'a> { candidates: &RoaringBitmap, distribution: &mut BTreeMap, ) -> heed::Result<()> { - facet_distribution_iter::iterate_over_facet_distribution( + facet_distribution_iter::lexicographically_iterate_over_facet_distribution( self.rtxn, self.index .facet_id_string_docids diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/milli/src/search/facet/facet_distribution_iter.rs index d355b981a..ac315b0d3 100644 --- a/milli/src/search/facet/facet_distribution_iter.rs +++ b/milli/src/search/facet/facet_distribution_iter.rs @@ -19,7 +19,7 @@ use crate::DocumentId; /// /// The return value of the closure is a `ControlFlow<()>` which indicates whether we should /// keep iterating over the different facet values or stop. -pub fn iterate_over_facet_distribution<'t, CB>( +pub fn lexicographically_iterate_over_facet_distribution<'t, CB>( rtxn: &'t heed::RoTxn<'t>, db: heed::Database, FacetGroupValueCodec>, field_id: u16, @@ -29,7 +29,7 @@ pub fn iterate_over_facet_distribution<'t, CB>( where CB: FnMut(&'t [u8], u64, DocumentId) -> Result>, { - let mut fd = FacetDistribution { rtxn, db, field_id, callback }; + let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback }; let highest_level = get_highest_level( rtxn, db.remap_key_type::>(), @@ -44,7 +44,7 @@ where } } -struct FacetDistribution<'t, CB> +struct LexicographicFacetDistribution<'t, CB> where CB: FnMut(&'t [u8], u64, DocumentId) -> Result>, { @@ -54,7 +54,7 @@ where callback: CB, } -impl<'t, CB> FacetDistribution<'t, CB> +impl<'t, CB> LexicographicFacetDistribution<'t, CB> where CB: FnMut(&'t [u8], u64, DocumentId) -> Result>, { @@ -86,6 +86,7 @@ where } Ok(ControlFlow::Continue(())) } + fn iterate( &mut self, candidates: &RoaringBitmap, @@ -116,7 +117,7 @@ where value.size as usize, )?; match cf { - ControlFlow::Continue(_) => {} + ControlFlow::Continue(_) => (), ControlFlow::Break(_) => return Ok(ControlFlow::Break(())), } } @@ -132,7 +133,7 @@ mod tests { use heed::BytesDecode; use roaring::RoaringBitmap; - use super::iterate_over_facet_distribution; + use super::lexicographically_iterate_over_facet_distribution; use crate::heed_codec::facet::OrderedF64Codec; use crate::milli_snap; use crate::search::facet::tests::{get_random_looking_index, get_simple_index}; @@ -144,7 +145,7 @@ mod tests { let txn = index.env.read_txn().unwrap(); let candidates = (0..=255).collect::(); let mut results = String::new(); - iterate_over_facet_distribution( + lexicographically_iterate_over_facet_distribution( &txn, index.content, 0, @@ -161,6 +162,7 @@ mod tests { txn.commit().unwrap(); } } + #[test] fn filter_distribution_all_stop_early() { let indexes = [get_simple_index(), get_random_looking_index()]; @@ -169,7 +171,7 @@ mod tests { let candidates = (0..=255).collect::(); let mut results = String::new(); let mut nbr_facets = 0; - iterate_over_facet_distribution( + lexicographically_iterate_over_facet_distribution( &txn, index.content, 0, From bd3c0264066608961d6392b435129a37f3de71ce Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Thu, 25 May 2023 12:28:26 +0200 Subject: [PATCH 02/19] First to-test version of the algorithm --- .../search/facet/facet_distribution_iter.rs | 92 +++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/milli/src/search/facet/facet_distribution_iter.rs index ac315b0d3..0e1efaa0e 100644 --- a/milli/src/search/facet/facet_distribution_iter.rs +++ b/milli/src/search/facet/facet_distribution_iter.rs @@ -1,3 +1,5 @@ +use std::cmp::Reverse; +use std::collections::{BTreeMap, BinaryHeap}; use std::ops::ControlFlow; use heed::Result; @@ -44,6 +46,96 @@ where } } +pub fn count_iterate_over_facet_distribution<'t, CB>( + rtxn: &'t heed::RoTxn<'t>, + db: heed::Database, FacetGroupValueCodec>, + field_id: u16, + candidates: &RoaringBitmap, +) -> Result> +where + CB: FnMut(&'t [u8], u64, DocumentId) -> Result>, +{ + #[derive(Debug, PartialOrd, Ord, PartialEq, Eq)] + struct LevelEntry<'t> { + /// The number of candidates in this entry. + count: u64, + /// The key level of the entry. + level: Reverse, + /// The left bound key. + left_bound: &'t [u8], + /// The number of keys we must look for after `left_bound`. + group_size: u8, + } + + // Represents the list of keys that we must explore. + let mut heap = BinaryHeap::new(); + let mut results = Vec::new(); + + let highest_level = get_highest_level( + rtxn, + db.remap_key_type::>(), + field_id, + )?; + + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { + // We first fill the heap with values from the highest level + let starting_key = + FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; + for el in db.range(rtxn, &(&starting_key..)).unwrap().take(usize::MAX) { + let (key, value) = el.unwrap(); + // The range is unbounded on the right and the group size for the highest level is MAX, + // so we need to check that we are not iterating over the next field id + if key.field_id != field_id { + break; + } + let count = value.bitmap.intersection_len(&candidates); + if count != 0 { + heap.push(LevelEntry { + count, + level: Reverse(key.level), + left_bound: key.left_bound, + group_size: value.size, + }); + } + } + + while let Some(LevelEntry { count, level, left_bound, group_size }) = heap.pop() { + if let Reverse(0) = level { + results.push((count, left_bound)); + // TODO better just call the user callback and ask for a ControlFlow + if results.len() == 20 { + break; + } + } else { + let starting_key = + FacetGroupKey { field_id, level: level.0 - 1, left_bound: left_bound }; + for el in db.range(rtxn, &(&starting_key..)).unwrap().take(group_size as usize) { + let (key, value) = el.unwrap(); + // The range is unbounded on the right and the group size for the highest level is MAX, + // so we need to check that we are not iterating over the next field id + if key.field_id != field_id { + break; + } + let count = value.bitmap.intersection_len(&candidates); + if count != 0 { + heap.push(LevelEntry { + count, + level: Reverse(key.level), + left_bound: key.left_bound, + group_size: value.size, + }); + } + } + } + } + + Ok(results) + } else { + Ok(Default::default()) + } +} + +/// Iterate over the facets values by lexicographic order. struct LexicographicFacetDistribution<'t, CB> where CB: FnMut(&'t [u8], u64, DocumentId) -> Result>, From f42bef2f663fe3d3899aa43888686758111d3c4a Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 29 May 2023 11:52:57 +0200 Subject: [PATCH 03/19] Make the search to always return the facets ordered by count --- milli/src/search/facet/facet_distribution.rs | 75 ++++++++++--------- .../search/facet/facet_distribution_iter.rs | 24 +++--- 2 files changed, 55 insertions(+), 44 deletions(-) diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 45ce13a28..f6ef51ccd 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -240,42 +240,49 @@ impl<'a> FacetDistribution<'a> { } fn facet_values(&self, field_id: FieldId) -> heed::Result> { - use FacetType::{Number, String}; + // use FacetType::{Number, String}; - match self.candidates { - Some(ref candidates) => { - // Classic search, candidates were specified, we must return facet values only related - // to those candidates. We also enter here for facet strings for performance reasons. - let mut distribution = BTreeMap::new(); - if candidates.len() <= CANDIDATES_THRESHOLD { - self.facet_distribution_from_documents( - field_id, - Number, - candidates, - &mut distribution, - )?; - self.facet_distribution_from_documents( - field_id, - String, - candidates, - &mut distribution, - )?; - } else { - self.facet_numbers_distribution_from_facet_levels( - field_id, - candidates, - &mut distribution, - )?; - self.facet_strings_distribution_from_facet_levels( - field_id, - candidates, - &mut distribution, - )?; - } - Ok(distribution) - } - None => self.facet_values_from_raw_facet_database(field_id), + let candidates = match self.candidates.as_ref() { + Some(candidates) => candidates.clone(), + None => todo!("fetch candidates"), + }; + + let mut distribution = BTreeMap::new(); + + let number_distribution = facet_distribution_iter::count_iterate_over_facet_distribution( + self.rtxn, + self.index + .facet_id_f64_docids + .remap_key_type::>(), + field_id, + &candidates, + )?; + + for (count, facet_key, _) in number_distribution { + let facet_key = OrderedF64Codec::bytes_decode(facet_key).unwrap(); + distribution.insert(facet_key.to_string(), count); } + + let string_distribution = facet_distribution_iter::count_iterate_over_facet_distribution( + self.rtxn, + self.index + .facet_id_string_docids + .remap_key_type::>(), + field_id, + &candidates, + )?; + + for (count, facet_key, any_docid) in string_distribution { + let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap(); + + let key: (FieldId, _, &str) = (field_id, any_docid, facet_key); + let original_string = + self.index.field_id_docid_facet_strings.get(self.rtxn, &key)?.unwrap().to_owned(); + + distribution.insert(original_string, count); + } + + Ok(distribution) } pub fn compute_stats(&self) -> Result> { diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/milli/src/search/facet/facet_distribution_iter.rs index 0e1efaa0e..acd936eff 100644 --- a/milli/src/search/facet/facet_distribution_iter.rs +++ b/milli/src/search/facet/facet_distribution_iter.rs @@ -1,5 +1,5 @@ use std::cmp::Reverse; -use std::collections::{BTreeMap, BinaryHeap}; +use std::collections::BinaryHeap; use std::ops::ControlFlow; use heed::Result; @@ -46,15 +46,12 @@ where } } -pub fn count_iterate_over_facet_distribution<'t, CB>( +pub fn count_iterate_over_facet_distribution<'t>( rtxn: &'t heed::RoTxn<'t>, db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: &RoaringBitmap, -) -> Result> -where - CB: FnMut(&'t [u8], u64, DocumentId) -> Result>, -{ +) -> Result> { #[derive(Debug, PartialOrd, Ord, PartialEq, Eq)] struct LevelEntry<'t> { /// The number of candidates in this entry. @@ -65,6 +62,8 @@ where left_bound: &'t [u8], /// The number of keys we must look for after `left_bound`. group_size: u8, + /// Any docid in the set of matching documents. Used to find the original facet string. + any_docid: u32, } // Represents the list of keys that we must explore. @@ -88,20 +87,23 @@ where if key.field_id != field_id { break; } - let count = value.bitmap.intersection_len(&candidates); + let intersection = value.bitmap & candidates; + let count = intersection.len(); if count != 0 { heap.push(LevelEntry { count, level: Reverse(key.level), left_bound: key.left_bound, group_size: value.size, + any_docid: intersection.min().unwrap(), }); } } - while let Some(LevelEntry { count, level, left_bound, group_size }) = heap.pop() { + while let Some(LevelEntry { count, level, left_bound, group_size, any_docid }) = heap.pop() + { if let Reverse(0) = level { - results.push((count, left_bound)); + results.push((count, left_bound, any_docid)); // TODO better just call the user callback and ask for a ControlFlow if results.len() == 20 { break; @@ -116,13 +118,15 @@ where if key.field_id != field_id { break; } - let count = value.bitmap.intersection_len(&candidates); + let intersection = value.bitmap & candidates; + let count = intersection.len(); if count != 0 { heap.push(LevelEntry { count, level: Reverse(key.level), left_bound: key.left_bound, group_size: value.size, + any_docid: intersection.min().unwrap(), }); } } From 80bbd4b6f3999e5b1e361bba63ebd7dbd888bf71 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 29 May 2023 13:39:06 +0200 Subject: [PATCH 04/19] Clean and make the facet order configurable internally --- milli/src/search/facet/facet_distribution.rs | 169 ++++++++---------- .../search/facet/facet_distribution_iter.rs | 23 ++- 2 files changed, 86 insertions(+), 106 deletions(-) diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index f6ef51ccd..3d277013a 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -9,12 +9,14 @@ use roaring::RoaringBitmap; use crate::error::UserError; use crate::facet::FacetType; use crate::heed_codec::facet::{ - FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, - OrderedF64Codec, + FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec, }; use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec}; use crate::search::facet::facet_distribution_iter; use crate::{FieldId, Index, Result}; +use facet_distribution_iter::{ + count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution, +}; /// The default number of values by facets that will /// be fetched from the key-value store. @@ -24,10 +26,20 @@ pub const DEFAULT_VALUES_PER_FACET: usize = 100; /// the system to choose between one algorithm or another. const CANDIDATES_THRESHOLD: u64 = 3000; +/// How should we fetch the facets? +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum OrderBy { + /// By lexicographic order... + Lexicographic, + /// Or by number of docids in common? + Count, +} + pub struct FacetDistribution<'a> { facets: Option>, candidates: Option, max_values_per_facet: usize, + order_by: OrderBy, rtxn: &'a heed::RoTxn<'a>, index: &'a Index, } @@ -38,6 +50,7 @@ impl<'a> FacetDistribution<'a> { facets: None, candidates: None, max_values_per_facet: DEFAULT_VALUES_PER_FACET, + order_by: OrderBy::Count, rtxn, index, } @@ -53,6 +66,11 @@ impl<'a> FacetDistribution<'a> { self } + pub fn order_by(&mut self, order_by: OrderBy) -> &mut Self { + self.order_by = order_by; + self + } + pub fn candidates(&mut self, candidates: RoaringBitmap) -> &mut Self { self.candidates = Some(candidates); self @@ -134,9 +152,15 @@ impl<'a> FacetDistribution<'a> { &self, field_id: FieldId, candidates: &RoaringBitmap, + order_by: OrderBy, distribution: &mut BTreeMap, ) -> heed::Result<()> { - facet_distribution_iter::lexicographically_iterate_over_facet_distribution( + let search_function = match order_by { + OrderBy::Lexicographic => lexicographically_iterate_over_facet_distribution, + OrderBy::Count => count_iterate_over_facet_distribution, + }; + + search_function( self.rtxn, self.index .facet_id_f64_docids @@ -159,9 +183,15 @@ impl<'a> FacetDistribution<'a> { &self, field_id: FieldId, candidates: &RoaringBitmap, + order_by: OrderBy, distribution: &mut BTreeMap, ) -> heed::Result<()> { - facet_distribution_iter::lexicographically_iterate_over_facet_distribution( + let search_function = match order_by { + OrderBy::Lexicographic => lexicographically_iterate_over_facet_distribution, + OrderBy::Count => count_iterate_over_facet_distribution, + }; + + search_function( self.rtxn, self.index .facet_id_string_docids @@ -189,98 +219,42 @@ impl<'a> FacetDistribution<'a> { ) } - /// Placeholder search, a.k.a. no candidates were specified. We iterate throught the - /// facet values one by one and iterate on the facet level 0 for numbers. - fn facet_values_from_raw_facet_database( - &self, - field_id: FieldId, - ) -> heed::Result> { - let mut distribution = BTreeMap::new(); - - let db = self.index.facet_id_f64_docids; - let mut prefix = vec![]; - prefix.extend_from_slice(&field_id.to_be_bytes()); - prefix.push(0); // read values from level 0 only - - let iter = db - .as_polymorph() - .prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())? - .remap_types::, FacetGroupValueCodec>(); - - for result in iter { - let (key, value) = result?; - distribution.insert(key.left_bound.to_string(), value.bitmap.len()); - if distribution.len() == self.max_values_per_facet { - break; - } - } - - let iter = self - .index - .facet_id_string_docids - .as_polymorph() - .prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())? - .remap_types::, FacetGroupValueCodec>(); - - for result in iter { - let (key, value) = result?; - - let docid = value.bitmap.iter().next().unwrap(); - let key: (FieldId, _, &'a str) = (field_id, docid, key.left_bound); - let original_string = - self.index.field_id_docid_facet_strings.get(self.rtxn, &key)?.unwrap().to_owned(); - - distribution.insert(original_string, value.bitmap.len()); - if distribution.len() == self.max_values_per_facet { - break; - } - } - - Ok(distribution) - } - fn facet_values(&self, field_id: FieldId) -> heed::Result> { - // use FacetType::{Number, String}; - - let candidates = match self.candidates.as_ref() { - Some(candidates) => candidates.clone(), - None => todo!("fetch candidates"), - }; + use FacetType::{Number, String}; let mut distribution = BTreeMap::new(); + match (self.order_by, &self.candidates) { + (OrderBy::Lexicographic, Some(cnd)) if cnd.len() <= CANDIDATES_THRESHOLD => { + // Classic search, candidates were specified, we must return facet values only related + // to those candidates. We also enter here for facet strings for performance reasons. + self.facet_distribution_from_documents(field_id, Number, cnd, &mut distribution)?; + self.facet_distribution_from_documents(field_id, String, cnd, &mut distribution)?; + } + _ => { + let universe; + let candidates; + match &self.candidates { + Some(cnd) => candidates = cnd, + None => { + universe = self.index.documents_ids(self.rtxn)?; + candidates = &universe; + } + } - let number_distribution = facet_distribution_iter::count_iterate_over_facet_distribution( - self.rtxn, - self.index - .facet_id_f64_docids - .remap_key_type::>(), - field_id, - &candidates, - )?; - - for (count, facet_key, _) in number_distribution { - let facet_key = OrderedF64Codec::bytes_decode(facet_key).unwrap(); - distribution.insert(facet_key.to_string(), count); - } - - let string_distribution = facet_distribution_iter::count_iterate_over_facet_distribution( - self.rtxn, - self.index - .facet_id_string_docids - .remap_key_type::>(), - field_id, - &candidates, - )?; - - for (count, facet_key, any_docid) in string_distribution { - let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap(); - - let key: (FieldId, _, &str) = (field_id, any_docid, facet_key); - let original_string = - self.index.field_id_docid_facet_strings.get(self.rtxn, &key)?.unwrap().to_owned(); - - distribution.insert(original_string, count); - } + self.facet_numbers_distribution_from_facet_levels( + field_id, + candidates, + self.order_by, + &mut distribution, + )?; + self.facet_strings_distribution_from_facet_levels( + field_id, + candidates, + self.order_by, + &mut distribution, + )?; + } + }; Ok(distribution) } @@ -381,13 +355,20 @@ impl<'a> FacetDistribution<'a> { impl fmt::Debug for FacetDistribution<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let FacetDistribution { facets, candidates, max_values_per_facet, rtxn: _, index: _ } = - self; + let FacetDistribution { + facets, + candidates, + max_values_per_facet, + order_by, + rtxn: _, + index: _, + } = self; f.debug_struct("FacetDistribution") .field("facets", facets) .field("candidates", candidates) .field("max_values_per_facet", max_values_per_facet) + .field("order_by", order_by) .finish() } } diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/milli/src/search/facet/facet_distribution_iter.rs index acd936eff..9ff57b34a 100644 --- a/milli/src/search/facet/facet_distribution_iter.rs +++ b/milli/src/search/facet/facet_distribution_iter.rs @@ -46,12 +46,16 @@ where } } -pub fn count_iterate_over_facet_distribution<'t>( +pub fn count_iterate_over_facet_distribution<'t, CB>( rtxn: &'t heed::RoTxn<'t>, db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: &RoaringBitmap, -) -> Result> { + mut callback: CB, +) -> Result<()> +where + CB: FnMut(&'t [u8], u64, DocumentId) -> Result>, +{ #[derive(Debug, PartialOrd, Ord, PartialEq, Eq)] struct LevelEntry<'t> { /// The number of candidates in this entry. @@ -68,8 +72,6 @@ pub fn count_iterate_over_facet_distribution<'t>( // Represents the list of keys that we must explore. let mut heap = BinaryHeap::new(); - let mut results = Vec::new(); - let highest_level = get_highest_level( rtxn, db.remap_key_type::>(), @@ -103,10 +105,9 @@ pub fn count_iterate_over_facet_distribution<'t>( while let Some(LevelEntry { count, level, left_bound, group_size, any_docid }) = heap.pop() { if let Reverse(0) = level { - results.push((count, left_bound, any_docid)); - // TODO better just call the user callback and ask for a ControlFlow - if results.len() == 20 { - break; + match (callback)(left_bound, count, any_docid)? { + ControlFlow::Continue(_) => (), + ControlFlow::Break(_) => return Ok(()), } } else { let starting_key = @@ -132,11 +133,9 @@ pub fn count_iterate_over_facet_distribution<'t>( } } } - - Ok(results) - } else { - Ok(Default::default()) } + + Ok(()) } /// Iterate over the facets values by lexicographic order. From 34b2e98fe907a0a0036f911b17db42874e3225a8 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 29 May 2023 15:32:09 +0200 Subject: [PATCH 05/19] Expose a sortFacetValuesBy parameter to the user --- meilisearch/src/routes/indexes/search.rs | 9 ++++-- meilisearch/src/search.rs | 33 ++++++++++++++++++-- milli/src/lib.rs | 2 +- milli/src/search/facet/facet_distribution.rs | 10 +++--- milli/src/search/facet/mod.rs | 2 +- milli/src/search/mod.rs | 2 +- 6 files changed, 45 insertions(+), 13 deletions(-) diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index a79f95ee4..2ee8c44f7 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -16,9 +16,9 @@ use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; use crate::search::{ - add_search_rules, perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, - DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, - DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, + add_search_rules, perform_search, FacetValuesSort, MatchingStrategy, SearchQuery, + DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, + DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, }; pub fn configure(cfg: &mut web::ServiceConfig) { @@ -64,6 +64,8 @@ pub struct SearchQueryGet { show_ranking_score_details: Param, #[deserr(default, error = DeserrQueryParamError)] facets: Option>, + #[deserr(default, error = DeserrQueryParamError)] + sort_facet_values_by: Option, #[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError)] highlight_pre_tag: String, #[deserr( default = DEFAULT_HIGHLIGHT_POST_TAG(), error = DeserrQueryParamError)] @@ -103,6 +105,7 @@ impl From for SearchQuery { show_ranking_score: other.show_ranking_score.0, show_ranking_score_details: other.show_ranking_score_details.0, facets: other.facets.map(|o| o.into_iter().collect()), + sort_facet_values_by: other.sort_facet_values_by, highlight_pre_tag: other.highlight_pre_tag, highlight_post_tag: other.highlight_post_tag, crop_marker: other.crop_marker, diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index bebf80084..ed2378d94 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -14,7 +14,7 @@ use meilisearch_types::heed::RoTxn; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy}; use meilisearch_types::milli::{ - dot_product_similarity, FacetValueHit, InternalError, SearchForFacetValues, + dot_product_similarity, FacetValueHit, OrderBy, InternalError, SearchForFacetValues, }; use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::{milli, Document}; @@ -74,6 +74,8 @@ pub struct SearchQuery { pub sort: Option>, #[deserr(default, error = DeserrJsonError)] pub facets: Option>, + #[deserr(default, error = DeserrJsonError)] // TODO + pub sort_facet_values_by: Option, #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_PRE_TAG())] pub highlight_pre_tag: String, #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_POST_TAG())] @@ -133,6 +135,8 @@ pub struct SearchQueryWithIndex { pub sort: Option>, #[deserr(default, error = DeserrJsonError)] pub facets: Option>, + #[deserr(default, error = DeserrJsonError)] // TODO + pub sort_facet_values_by: Option, #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_PRE_TAG())] pub highlight_pre_tag: String, #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_POST_TAG())] @@ -165,6 +169,7 @@ impl SearchQueryWithIndex { filter, sort, facets, + sort_facet_values_by, highlight_pre_tag, highlight_post_tag, crop_marker, @@ -190,6 +195,7 @@ impl SearchQueryWithIndex { filter, sort, facets, + sort_facet_values_by, highlight_pre_tag, highlight_post_tag, crop_marker, @@ -226,6 +232,26 @@ impl From for TermsMatchingStrategy { } } +#[derive(Debug, Default, Clone, PartialEq, Eq, Deserr)] +#[deserr(rename_all = camelCase)] +pub enum FacetValuesSort { + /// Facet values are sorted by decreasing count. + /// The count is the number of records containing this facet value in the results of the query. + #[default] + Alpha, + /// Facet values are sorted in alphabetical order, ascending from A to Z. + Count, +} + +impl Into for FacetValuesSort { + fn into(self) -> OrderBy { + match self { + FacetValuesSort::Alpha => OrderBy::Lexicographic, + FacetValuesSort::Count => OrderBy::Count, + } + } +} + #[derive(Debug, Clone, Serialize, PartialEq)] pub struct SearchHit { #[serde(flatten)] @@ -557,7 +583,10 @@ pub fn perform_search( if fields.iter().all(|f| f != "*") { facet_distribution.facets(fields); } - let distribution = facet_distribution.candidates(candidates).execute()?; + let distribution = facet_distribution + .candidates(candidates) + .order_by(query.sort_facet_values_by.map_or_else(Default::default, Into::into)) + .execute()?; let stats = facet_distribution.compute_stats()?; (Some(distribution), Some(stats)) } diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 4360eb38e..55b283931 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -58,7 +58,7 @@ pub use self::heed_codec::{ pub use self::index::Index; pub use self::search::{ FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder, - MatchingWords, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy, + MatchingWords, OrderBy, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, }; diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 3d277013a..3cc970049 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -12,11 +12,10 @@ use crate::heed_codec::facet::{ FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec, }; use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec}; -use crate::search::facet::facet_distribution_iter; -use crate::{FieldId, Index, Result}; -use facet_distribution_iter::{ +use crate::search::facet::facet_distribution_iter::{ count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution, }; +use crate::{FieldId, Index, Result}; /// The default number of values by facets that will /// be fetched from the key-value store. @@ -27,9 +26,10 @@ pub const DEFAULT_VALUES_PER_FACET: usize = 100; const CANDIDATES_THRESHOLD: u64 = 3000; /// How should we fetch the facets? -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum OrderBy { /// By lexicographic order... + #[default] Lexicographic, /// Or by number of docids in common? Count, @@ -50,7 +50,7 @@ impl<'a> FacetDistribution<'a> { facets: None, candidates: None, max_values_per_facet: DEFAULT_VALUES_PER_FACET, - order_by: OrderBy::Count, + order_by: OrderBy::default(), rtxn, index, } diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index 51f1bf005..ebc9e1da0 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -4,7 +4,7 @@ use heed::types::{ByteSlice, DecodeIgnore}; use heed::{BytesDecode, RoTxn}; use roaring::RoaringBitmap; -pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET}; +pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::filter::{BadGeoError, Filter}; use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec}; use crate::heed_codec::ByteSliceRefCodec; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index e05562f8e..65e78caa9 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -7,7 +7,7 @@ use log::error; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; -pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET}; +pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords}; use self::new::PartialSearchResult; use crate::error::UserError; From a385642ec3ef031cad79695450f031eacf5aad48 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 29 May 2023 15:47:45 +0200 Subject: [PATCH 06/19] Replace the BTreeMap by an IndexMap to return values in order --- Cargo.lock | 1 + meilisearch/Cargo.toml | 50 ++++++++++++++++---- meilisearch/src/search.rs | 3 +- milli/Cargo.toml | 1 + milli/src/search/facet/facet_distribution.rs | 13 ++--- 5 files changed, 53 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ccf79f9a2..effdfe9a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2731,6 +2731,7 @@ dependencies = [ "grenad", "heed", "hnsw", + "indexmap", "insta", "itertools", "json-depth-checker", diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index d90dd24dd..7d6601ac5 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -14,14 +14,27 @@ default-run = "meilisearch" [dependencies] actix-cors = "0.6.4" -actix-http = { version = "3.3.1", default-features = false, features = ["compress-brotli", "compress-gzip", "rustls"] } -actix-web = { version = "4.3.1", default-features = false, features = ["macros", "compress-brotli", "compress-gzip", "cookies", "rustls"] } +actix-http = { version = "3.3.1", default-features = false, features = [ + "compress-brotli", + "compress-gzip", + "rustls", +] } +actix-web = { version = "4.3.1", default-features = false, features = [ + "macros", + "compress-brotli", + "compress-gzip", + "cookies", + "rustls", +] } actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true } anyhow = { version = "1.0.70", features = ["backtrace"] } async-stream = "0.3.5" async-trait = "0.1.68" bstr = "1.4.0" -byte-unit = { version = "4.0.19", default-features = false, features = ["std", "serde"] } +byte-unit = { version = "4.0.19", default-features = false, features = [ + "std", + "serde", +] } bytes = "1.4.0" clap = { version = "4.2.1", features = ["derive", "env"] } crossbeam-channel = "0.5.8" @@ -57,7 +70,10 @@ prometheus = { version = "0.13.3", features = ["process"] } rand = "0.8.5" rayon = "1.7.0" regex = "1.7.3" -reqwest = { version = "0.11.16", features = ["rustls-tls", "json"], default-features = false } +reqwest = { version = "0.11.16", features = [ + "rustls-tls", + "json", +], default-features = false } rustls = "0.20.8" rustls-pemfile = "1.0.2" segment = { version = "0.2.2", optional = true } @@ -71,7 +87,12 @@ sysinfo = "0.28.4" tar = "0.4.38" tempfile = "3.5.0" thiserror = "1.0.40" -time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] } +time = { version = "0.3.20", features = [ + "serde-well-known", + "formatting", + "parsing", + "macros", +] } tokio = { version = "1.27.0", features = ["full"] } tokio-stream = "0.1.12" toml = "0.7.3" @@ -90,7 +111,7 @@ brotli = "3.3.4" insta = "1.29.0" manifest-dir-macros = "0.1.16" maplit = "1.0.2" -meili-snap = {path = "../meili-snap"} +meili-snap = { path = "../meili-snap" } temp-env = "0.3.3" urlencoding = "2.1.2" yaup = "0.2.1" @@ -99,7 +120,10 @@ yaup = "0.2.1" anyhow = { version = "1.0.70", optional = true } cargo_toml = { version = "0.15.2", optional = true } hex = { version = "0.4.3", optional = true } -reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false, optional = true } +reqwest = { version = "0.11.16", features = [ + "blocking", + "rustls-tls", +], default-features = false, optional = true } sha-1 = { version = "0.10.1", optional = true } static-files = { version = "0.2.3", optional = true } tempfile = { version = "3.5.0", optional = true } @@ -109,7 +133,17 @@ zip = { version = "0.6.4", optional = true } [features] default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"] analytics = ["segment"] -mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"] +mini-dashboard = [ + "actix-web-static-files", + "static-files", + "anyhow", + "cargo_toml", + "hex", + "reqwest", + "sha-1", + "tempfile", + "zip", +] chinese = ["meilisearch-types/chinese"] hebrew = ["meilisearch-types/hebrew"] japanese = ["meilisearch-types/japanese"] diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index ed2378d94..21a2a4312 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -6,6 +6,7 @@ use std::time::Instant; use deserr::Deserr; use either::Either; use index_scheduler::RoFeatures; +use indexmap::IndexMap; use log::warn; use meilisearch_auth::IndexSearchRules; use meilisearch_types::deserr::DeserrJsonError; @@ -279,7 +280,7 @@ pub struct SearchResult { #[serde(flatten)] pub hits_info: HitsInfo, #[serde(skip_serializing_if = "Option::is_none")] - pub facet_distribution: Option>>, + pub facet_distribution: Option>>, #[serde(skip_serializing_if = "Option::is_none")] pub facet_stats: Option>, } diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 08f0c2645..aa4b98ec2 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -34,6 +34,7 @@ heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.6", default-f "sync-read-txn", ] } hnsw = { version = "0.11.0", features = ["serde1"] } +indexmap = { version = "1.9.3", features = ["serde"] } json-depth-checker = { path = "../json-depth-checker" } levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } memmap2 = "0.5.10" diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 3cc970049..f49cbe1c4 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -4,6 +4,7 @@ use std::{fmt, mem}; use heed::types::ByteSlice; use heed::BytesDecode; +use indexmap::IndexMap; use roaring::RoaringBitmap; use crate::error::UserError; @@ -83,7 +84,7 @@ impl<'a> FacetDistribution<'a> { field_id: FieldId, facet_type: FacetType, candidates: &RoaringBitmap, - distribution: &mut BTreeMap, + distribution: &mut IndexMap, ) -> heed::Result<()> { match facet_type { FacetType::Number => { @@ -153,7 +154,7 @@ impl<'a> FacetDistribution<'a> { field_id: FieldId, candidates: &RoaringBitmap, order_by: OrderBy, - distribution: &mut BTreeMap, + distribution: &mut IndexMap, ) -> heed::Result<()> { let search_function = match order_by { OrderBy::Lexicographic => lexicographically_iterate_over_facet_distribution, @@ -184,7 +185,7 @@ impl<'a> FacetDistribution<'a> { field_id: FieldId, candidates: &RoaringBitmap, order_by: OrderBy, - distribution: &mut BTreeMap, + distribution: &mut IndexMap, ) -> heed::Result<()> { let search_function = match order_by { OrderBy::Lexicographic => lexicographically_iterate_over_facet_distribution, @@ -219,10 +220,10 @@ impl<'a> FacetDistribution<'a> { ) } - fn facet_values(&self, field_id: FieldId) -> heed::Result> { + fn facet_values(&self, field_id: FieldId) -> heed::Result> { use FacetType::{Number, String}; - let mut distribution = BTreeMap::new(); + let mut distribution = IndexMap::new(); match (self.order_by, &self.candidates) { (OrderBy::Lexicographic, Some(cnd)) if cnd.len() <= CANDIDATES_THRESHOLD => { // Classic search, candidates were specified, we must return facet values only related @@ -318,7 +319,7 @@ impl<'a> FacetDistribution<'a> { Ok(distribution) } - pub fn execute(&self) -> Result>> { + pub fn execute(&self) -> Result>> { let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; let filterable_fields = self.index.filterable_fields(self.rtxn)?; From d9fea0143ff3d1ac4e2f4e06838cf299f5859a65 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 29 May 2023 15:51:00 +0200 Subject: [PATCH 07/19] Make Clippy happy --- meilisearch/src/routes/indexes/search.rs | 2 +- meilisearch/src/search.rs | 12 ++++++------ milli/src/search/facet/facet_distribution_iter.rs | 3 +-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 2ee8c44f7..a25c83961 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -64,7 +64,7 @@ pub struct SearchQueryGet { show_ranking_score_details: Param, #[deserr(default, error = DeserrQueryParamError)] facets: Option>, - #[deserr(default, error = DeserrQueryParamError)] + #[deserr(default, error = DeserrQueryParamError)] // TODO sort_facet_values_by: Option, #[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError)] highlight_pre_tag: String, diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 21a2a4312..6d614ad7b 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -236,17 +236,17 @@ impl From for TermsMatchingStrategy { #[derive(Debug, Default, Clone, PartialEq, Eq, Deserr)] #[deserr(rename_all = camelCase)] pub enum FacetValuesSort { - /// Facet values are sorted by decreasing count. - /// The count is the number of records containing this facet value in the results of the query. + /// Facet values are sorted in alphabetical order, ascending from A to Z. #[default] Alpha, - /// Facet values are sorted in alphabetical order, ascending from A to Z. + /// Facet values are sorted by decreasing count. + /// The count is the number of records containing this facet value in the results of the query. Count, } -impl Into for FacetValuesSort { - fn into(self) -> OrderBy { - match self { +impl From for OrderBy { + fn from(val: FacetValuesSort) -> Self { + match val { FacetValuesSort::Alpha => OrderBy::Lexicographic, FacetValuesSort::Count => OrderBy::Count, } diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/milli/src/search/facet/facet_distribution_iter.rs index 9ff57b34a..475022f29 100644 --- a/milli/src/search/facet/facet_distribution_iter.rs +++ b/milli/src/search/facet/facet_distribution_iter.rs @@ -110,8 +110,7 @@ where ControlFlow::Break(_) => return Ok(()), } } else { - let starting_key = - FacetGroupKey { field_id, level: level.0 - 1, left_bound: left_bound }; + let starting_key = FacetGroupKey { field_id, level: level.0 - 1, left_bound }; for el in db.range(rtxn, &(&starting_key..)).unwrap().take(group_size as usize) { let (key, value) = el.unwrap(); // The range is unbounded on the right and the group size for the highest level is MAX, From 9917bf046a08cafb1c0c2ce74d86a1dcedc4f5e0 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Thu, 22 Jun 2023 17:13:40 +0200 Subject: [PATCH 08/19] Move the sortFacetValuesBy in the faceting settings --- dump/src/reader/compat/v5_to_v6.rs | 1 + meilisearch-types/src/facet_values_sort.rs | 33 ++++++ meilisearch-types/src/lib.rs | 1 + meilisearch-types/src/settings.rs | 32 +++++- meilisearch/src/routes/indexes/search.rs | 9 +- meilisearch/src/routes/indexes/settings.rs | 5 + meilisearch/src/search.rs | 25 ++-- milli/src/index.rs | 29 ++++- milli/src/search/facet/facet_distribution.rs | 113 +++++++++++-------- milli/src/update/settings.rs | 29 ++++- milli/tests/search/facet_distribution.rs | 6 +- 11 files changed, 213 insertions(+), 70 deletions(-) create mode 100644 meilisearch-types/src/facet_values_sort.rs diff --git a/dump/src/reader/compat/v5_to_v6.rs b/dump/src/reader/compat/v5_to_v6.rs index b4c851d28..ef5588d8f 100644 --- a/dump/src/reader/compat/v5_to_v6.rs +++ b/dump/src/reader/compat/v5_to_v6.rs @@ -362,6 +362,7 @@ impl From> for v6::Settings { faceting: match settings.faceting { v5::Setting::Set(faceting) => v6::Setting::Set(v6::FacetingSettings { max_values_per_facet: faceting.max_values_per_facet.into(), + sort_facet_values_by: v6::Setting::NotSet, }), v5::Setting::Reset => v6::Setting::Reset, v5::Setting::NotSet => v6::Setting::NotSet, diff --git a/meilisearch-types/src/facet_values_sort.rs b/meilisearch-types/src/facet_values_sort.rs new file mode 100644 index 000000000..278061f19 --- /dev/null +++ b/meilisearch-types/src/facet_values_sort.rs @@ -0,0 +1,33 @@ +use deserr::Deserr; +use milli::OrderBy; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Deserr)] +#[serde(rename_all = "camelCase")] +#[deserr(rename_all = camelCase)] +pub enum FacetValuesSort { + /// Facet values are sorted in alphabetical order, ascending from A to Z. + #[default] + Alpha, + /// Facet values are sorted by decreasing count. + /// The count is the number of records containing this facet value in the results of the query. + Count, +} + +impl From for OrderBy { + fn from(val: FacetValuesSort) -> Self { + match val { + FacetValuesSort::Alpha => OrderBy::Lexicographic, + FacetValuesSort::Count => OrderBy::Count, + } + } +} + +impl From for FacetValuesSort { + fn from(val: OrderBy) -> Self { + match val { + OrderBy::Lexicographic => FacetValuesSort::Alpha, + OrderBy::Count => FacetValuesSort::Count, + } + } +} diff --git a/meilisearch-types/src/lib.rs b/meilisearch-types/src/lib.rs index dbdec14fc..b0762563a 100644 --- a/meilisearch-types/src/lib.rs +++ b/meilisearch-types/src/lib.rs @@ -2,6 +2,7 @@ pub mod compression; pub mod deserr; pub mod document_formats; pub mod error; +pub mod facet_values_sort; pub mod features; pub mod index_uid; pub mod index_uid_pattern; diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs index 0fc7ea6e2..fa47a3dd7 100644 --- a/meilisearch-types/src/settings.rs +++ b/meilisearch-types/src/settings.rs @@ -14,6 +14,7 @@ use serde::{Deserialize, Serialize, Serializer}; use crate::deserr::DeserrJsonError; use crate::error::deserr_codes::*; +use crate::facet_values_sort::FacetValuesSort; /// The maximimum number of results that the engine /// will be able to return in one search call. @@ -102,6 +103,9 @@ pub struct FacetingSettings { #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] pub max_values_per_facet: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + pub sort_facet_values_by: Setting>, } #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)] @@ -398,12 +402,21 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match settings.faceting { - Setting::Set(ref value) => match value.max_values_per_facet { - Setting::Set(val) => builder.set_max_values_per_facet(val), - Setting::Reset => builder.reset_max_values_per_facet(), - Setting::NotSet => (), - }, + match &settings.faceting { + Setting::Set(FacetingSettings { max_values_per_facet, sort_facet_values_by }) => { + match max_values_per_facet { + Setting::Set(val) => builder.set_max_values_per_facet(*val), + Setting::Reset => builder.reset_max_values_per_facet(), + Setting::NotSet => (), + } + match sort_facet_values_by { + Setting::Set(val) => builder.set_sort_facet_values_by( + val.iter().map(|(name, order)| (name.clone(), (*order).into())).collect(), + ), + Setting::Reset => builder.reset_sort_facet_values_by(), + Setting::NotSet => (), + } + } Setting::Reset => builder.reset_max_values_per_facet(), Setting::NotSet => (), } @@ -476,6 +489,13 @@ pub fn settings( max_values_per_facet: Setting::Set( index.max_values_per_facet(rtxn)?.unwrap_or(DEFAULT_VALUES_PER_FACET), ), + sort_facet_values_by: Setting::Set( + index + .sort_facet_values_by(rtxn)? + .into_iter() + .map(|(name, sort)| (name, sort.into())) + .collect(), + ), }; let pagination = PaginationSettings { diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index a25c83961..a79f95ee4 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -16,9 +16,9 @@ use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; use crate::search::{ - add_search_rules, perform_search, FacetValuesSort, MatchingStrategy, SearchQuery, - DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, - DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, + add_search_rules, perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, + DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, + DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, }; pub fn configure(cfg: &mut web::ServiceConfig) { @@ -64,8 +64,6 @@ pub struct SearchQueryGet { show_ranking_score_details: Param, #[deserr(default, error = DeserrQueryParamError)] facets: Option>, - #[deserr(default, error = DeserrQueryParamError)] // TODO - sort_facet_values_by: Option, #[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError)] highlight_pre_tag: String, #[deserr( default = DEFAULT_HIGHLIGHT_POST_TAG(), error = DeserrQueryParamError)] @@ -105,7 +103,6 @@ impl From for SearchQuery { show_ranking_score: other.show_ranking_score.0, show_ranking_score_details: other.show_ranking_score_details.0, facets: other.facets.map(|o| o.into_iter().collect()), - sort_facet_values_by: other.sort_facet_values_by, highlight_pre_tag: other.highlight_pre_tag, highlight_post_tag: other.highlight_post_tag, crop_marker: other.crop_marker, diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index 4b6cde685..7035093eb 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -407,6 +407,7 @@ make_setting_route!( json!({ "faceting": { "max_values_per_facet": setting.as_ref().and_then(|s| s.max_values_per_facet.set()), + "sort_facet_values_by": setting.as_ref().and_then(|s| s.sort_facet_values_by.clone().set()), }, }), Some(req), @@ -545,6 +546,10 @@ pub async fn update_all( .as_ref() .set() .and_then(|s| s.max_values_per_facet.as_ref().set()), + "sort_facet_values_by": new_settings.faceting + .as_ref() + .set() + .and_then(|s| s.sort_facet_values_by.as_ref().set()), }, "pagination": { "max_total_hits": new_settings.pagination diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 6d614ad7b..5c45e030d 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -75,8 +75,6 @@ pub struct SearchQuery { pub sort: Option>, #[deserr(default, error = DeserrJsonError)] pub facets: Option>, - #[deserr(default, error = DeserrJsonError)] // TODO - pub sort_facet_values_by: Option, #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_PRE_TAG())] pub highlight_pre_tag: String, #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_POST_TAG())] @@ -136,8 +134,6 @@ pub struct SearchQueryWithIndex { pub sort: Option>, #[deserr(default, error = DeserrJsonError)] pub facets: Option>, - #[deserr(default, error = DeserrJsonError)] // TODO - pub sort_facet_values_by: Option, #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_PRE_TAG())] pub highlight_pre_tag: String, #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_POST_TAG())] @@ -170,7 +166,6 @@ impl SearchQueryWithIndex { filter, sort, facets, - sort_facet_values_by, highlight_pre_tag, highlight_post_tag, crop_marker, @@ -196,7 +191,6 @@ impl SearchQueryWithIndex { filter, sort, facets, - sort_facet_values_by, highlight_pre_tag, highlight_post_tag, crop_marker, @@ -581,12 +575,29 @@ pub fn perform_search( .unwrap_or(DEFAULT_VALUES_PER_FACET); facet_distribution.max_values_per_facet(max_values_by_facet); + let sort_facet_values_by = + index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?; + let default_sort_facet_values_by = + sort_facet_values_by.get("*").copied().unwrap_or_default(); + if fields.iter().all(|f| f != "*") { + let fields: Vec<_> = fields + .into_iter() + .map(|n| { + ( + n, + sort_facet_values_by + .get(n) + .copied() + .unwrap_or(default_sort_facet_values_by), + ) + }) + .collect(); facet_distribution.facets(fields); } let distribution = facet_distribution .candidates(candidates) - .order_by(query.sort_facet_values_by.map_or_else(Default::default, Into::into)) + .default_order_by(default_sort_facet_values_by) .execute()?; let stats = facet_distribution.compute_stats()?; (Some(distribution), Some(stats)) diff --git a/milli/src/index.rs b/milli/src/index.rs index 5c32f75f5..392ed1705 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -26,7 +26,8 @@ use crate::readable_slices::ReadableSlices; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec, - Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, + OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, + BEU32, }; /// The HNSW data-structure that we serialize, fill and search in. @@ -71,6 +72,7 @@ pub mod main_key { pub const EXACT_WORDS: &str = "exact-words"; pub const EXACT_ATTRIBUTES: &str = "exact-attributes"; pub const MAX_VALUES_PER_FACET: &str = "max-values-per-facet"; + pub const SORT_FACET_VALUES_BY: &str = "sort-facet-values-by"; pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits"; } @@ -1298,6 +1300,31 @@ impl Index { self.main.delete::<_, Str>(txn, main_key::MAX_VALUES_PER_FACET) } + pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result> { + let mut orders = self + .main + .get::<_, Str, SerdeJson>>( + txn, + main_key::SORT_FACET_VALUES_BY, + )? + .unwrap_or_default(); + // Insert the default ordering if it is not already overwritten by the user. + orders.entry("*".to_string()).or_insert(OrderBy::Lexicographic); + Ok(orders) + } + + pub(crate) fn put_sort_facet_values_by( + &self, + txn: &mut RwTxn, + val: &HashMap, + ) -> heed::Result<()> { + self.main.put::<_, Str, SerdeJson<_>>(txn, main_key::SORT_FACET_VALUES_BY, &val) + } + + pub(crate) fn delete_sort_facet_values_by(&self, txn: &mut RwTxn) -> heed::Result { + self.main.delete::<_, Str>(txn, main_key::SORT_FACET_VALUES_BY) + } + pub fn pagination_max_total_hits(&self, txn: &RoTxn) -> heed::Result> { self.main.get::<_, Str, OwnedType>(txn, main_key::PAGINATION_MAX_TOTAL_HITS) } diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index f49cbe1c4..fa6cc2fa0 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, HashSet}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::ops::ControlFlow; use std::{fmt, mem}; @@ -6,6 +6,7 @@ use heed::types::ByteSlice; use heed::BytesDecode; use indexmap::IndexMap; use roaring::RoaringBitmap; +use serde::{Deserialize, Serialize}; use crate::error::UserError; use crate::facet::FacetType; @@ -27,7 +28,7 @@ pub const DEFAULT_VALUES_PER_FACET: usize = 100; const CANDIDATES_THRESHOLD: u64 = 3000; /// How should we fetch the facets? -#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] pub enum OrderBy { /// By lexicographic order... #[default] @@ -37,10 +38,10 @@ pub enum OrderBy { } pub struct FacetDistribution<'a> { - facets: Option>, + facets: Option>, candidates: Option, max_values_per_facet: usize, - order_by: OrderBy, + default_order_by: OrderBy, rtxn: &'a heed::RoTxn<'a>, index: &'a Index, } @@ -51,14 +52,22 @@ impl<'a> FacetDistribution<'a> { facets: None, candidates: None, max_values_per_facet: DEFAULT_VALUES_PER_FACET, - order_by: OrderBy::default(), + default_order_by: OrderBy::default(), rtxn, index, } } - pub fn facets, A: AsRef>(&mut self, names: I) -> &mut Self { - self.facets = Some(names.into_iter().map(|s| s.as_ref().to_string()).collect()); + pub fn facets, A: AsRef>( + &mut self, + names_ordered_by: I, + ) -> &mut Self { + self.facets = Some( + names_ordered_by + .into_iter() + .map(|(name, order_by)| (name.as_ref().to_string(), order_by)) + .collect(), + ); self } @@ -67,8 +76,8 @@ impl<'a> FacetDistribution<'a> { self } - pub fn order_by(&mut self, order_by: OrderBy) -> &mut Self { - self.order_by = order_by; + pub fn default_order_by(&mut self, order_by: OrderBy) -> &mut Self { + self.default_order_by = order_by; self } @@ -220,11 +229,15 @@ impl<'a> FacetDistribution<'a> { ) } - fn facet_values(&self, field_id: FieldId) -> heed::Result> { + fn facet_values( + &self, + field_id: FieldId, + order_by: OrderBy, + ) -> heed::Result> { use FacetType::{Number, String}; let mut distribution = IndexMap::new(); - match (self.order_by, &self.candidates) { + match (order_by, &self.candidates) { (OrderBy::Lexicographic, Some(cnd)) if cnd.len() <= CANDIDATES_THRESHOLD => { // Classic search, candidates were specified, we must return facet values only related // to those candidates. We also enter here for facet strings for performance reasons. @@ -245,13 +258,13 @@ impl<'a> FacetDistribution<'a> { self.facet_numbers_distribution_from_facet_levels( field_id, candidates, - self.order_by, + order_by, &mut distribution, )?; self.facet_strings_distribution_from_facet_levels( field_id, candidates, - self.order_by, + order_by, &mut distribution, )?; } @@ -273,6 +286,7 @@ impl<'a> FacetDistribution<'a> { Some(facets) => { let invalid_fields: HashSet<_> = facets .iter() + .map(|(name, _)| name) .filter(|facet| !crate::is_faceted(facet, &filterable_fields)) .collect(); if !invalid_fields.is_empty() { @@ -282,7 +296,7 @@ impl<'a> FacetDistribution<'a> { } .into()); } else { - facets.clone() + facets.into_iter().map(|(name, _)| name).cloned().collect() } } None => filterable_fields, @@ -327,6 +341,7 @@ impl<'a> FacetDistribution<'a> { Some(ref facets) => { let invalid_fields: HashSet<_> = facets .iter() + .map(|(name, _)| name) .filter(|facet| !crate::is_faceted(facet, &filterable_fields)) .collect(); if !invalid_fields.is_empty() { @@ -336,7 +351,7 @@ impl<'a> FacetDistribution<'a> { } .into()); } else { - facets.clone() + facets.into_iter().map(|(name, _)| name).cloned().collect() } } None => filterable_fields, @@ -345,7 +360,13 @@ impl<'a> FacetDistribution<'a> { let mut distribution = BTreeMap::new(); for (fid, name) in fields_ids_map.iter() { if crate::is_faceted(name, &fields) { - let values = self.facet_values(fid)?; + let order_by = self + .facets + .as_ref() + .map(|facets| facets.get(name).copied()) + .flatten() + .unwrap_or(self.default_order_by); + let values = self.facet_values(fid, order_by)?; distribution.insert(name.to_string(), values); } } @@ -360,7 +381,7 @@ impl fmt::Debug for FacetDistribution<'_> { facets, candidates, max_values_per_facet, - order_by, + default_order_by, rtxn: _, index: _, } = self; @@ -369,7 +390,7 @@ impl fmt::Debug for FacetDistribution<'_> { .field("facets", facets) .field("candidates", candidates) .field("max_values_per_facet", max_values_per_facet) - .field("order_by", order_by) + .field("default_order_by", default_order_by) .finish() } } @@ -381,7 +402,7 @@ mod tests { use crate::documents::documents_batch_reader_from_objects; use crate::index::tests::TempIndex; - use crate::{milli_snap, FacetDistribution}; + use crate::{milli_snap, FacetDistribution, OrderBy}; #[test] fn few_candidates_few_facet_values() { @@ -406,14 +427,14 @@ mod tests { let txn = index.read_txn().unwrap(); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .execute() .unwrap(); milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .candidates([0, 1, 2].iter().copied().collect()) .execute() .unwrap(); @@ -421,7 +442,7 @@ mod tests { milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .candidates([1, 2].iter().copied().collect()) .execute() .unwrap(); @@ -432,7 +453,7 @@ mod tests { milli_snap!(format!("{map:?}"), @r###"{"colour": {" blue": 1, "RED": 1}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .candidates([2].iter().copied().collect()) .execute() .unwrap(); @@ -440,7 +461,7 @@ mod tests { milli_snap!(format!("{map:?}"), @r###"{"colour": {"RED": 1}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .candidates([0, 1, 2].iter().copied().collect()) .max_values_per_facet(1) .execute() @@ -478,14 +499,14 @@ mod tests { let txn = index.read_txn().unwrap(); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .execute() .unwrap(); milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000, "Red": 6000}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .max_values_per_facet(1) .execute() .unwrap(); @@ -493,7 +514,7 @@ mod tests { milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .candidates((0..10_000).collect()) .execute() .unwrap(); @@ -501,7 +522,7 @@ mod tests { milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000, "Red": 6000}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .candidates((0..5_000).collect()) .execute() .unwrap(); @@ -509,7 +530,7 @@ mod tests { milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000, "Red": 3000}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .candidates((0..5_000).collect()) .execute() .unwrap(); @@ -517,7 +538,7 @@ mod tests { milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000, "Red": 3000}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .candidates((0..5_000).collect()) .max_values_per_facet(1) .execute() @@ -555,14 +576,14 @@ mod tests { let txn = index.read_txn().unwrap(); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .execute() .unwrap(); milli_snap!(format!("{map:?}"), "no_candidates", @"ac9229ed5964d893af96a7076e2f8af5"); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .max_values_per_facet(2) .execute() .unwrap(); @@ -570,7 +591,7 @@ mod tests { milli_snap!(format!("{map:?}"), "no_candidates_with_max_2", @r###"{"colour": {"0": 10, "1": 10}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .candidates((0..10_000).collect()) .execute() .unwrap(); @@ -578,7 +599,7 @@ mod tests { milli_snap!(format!("{map:?}"), "candidates_0_10_000", @"ac9229ed5964d893af96a7076e2f8af5"); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .candidates((0..5_000).collect()) .execute() .unwrap(); @@ -615,14 +636,14 @@ mod tests { let txn = index.read_txn().unwrap(); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .compute_stats() .unwrap(); milli_snap!(format!("{map:?}"), "no_candidates", @"{}"); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .candidates((0..1000).collect()) .compute_stats() .unwrap(); @@ -630,7 +651,7 @@ mod tests { milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .candidates((217..777).collect()) .compute_stats() .unwrap(); @@ -667,14 +688,14 @@ mod tests { let txn = index.read_txn().unwrap(); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .compute_stats() .unwrap(); milli_snap!(format!("{map:?}"), "no_candidates", @"{}"); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .candidates((0..1000).collect()) .compute_stats() .unwrap(); @@ -682,7 +703,7 @@ mod tests { milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1999.0)}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .candidates((217..777).collect()) .compute_stats() .unwrap(); @@ -719,14 +740,14 @@ mod tests { let txn = index.read_txn().unwrap(); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .compute_stats() .unwrap(); milli_snap!(format!("{map:?}"), "no_candidates", @"{}"); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .candidates((0..1000).collect()) .compute_stats() .unwrap(); @@ -734,7 +755,7 @@ mod tests { milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .candidates((217..777).collect()) .compute_stats() .unwrap(); @@ -775,14 +796,14 @@ mod tests { let txn = index.read_txn().unwrap(); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .compute_stats() .unwrap(); milli_snap!(format!("{map:?}"), "no_candidates", @"{}"); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .candidates((0..1000).collect()) .compute_stats() .unwrap(); @@ -790,7 +811,7 @@ mod tests { milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1998.0)}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once("colour")) + .facets(std::iter::once(("colour", OrderBy::default()))) .candidates((217..777).collect()) .compute_stats() .unwrap(); diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 3e271924b..aa69abca1 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -14,7 +14,7 @@ use crate::error::UserError; use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; use crate::update::index_documents::IndexDocumentsMethod; use crate::update::{IndexDocuments, UpdateIndexingStep}; -use crate::{FieldsIdsMap, Index, Result}; +use crate::{FieldsIdsMap, Index, OrderBy, Result}; #[derive(Debug, Clone, PartialEq, Eq, Copy)] pub enum Setting { @@ -122,6 +122,7 @@ pub struct Settings<'a, 't, 'u, 'i> { /// Attributes on which typo tolerance is disabled. exact_attributes: Setting>, max_values_per_facet: Setting, + sort_facet_values_by: Setting>, pagination_max_total_hits: Setting, } @@ -149,6 +150,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { min_word_len_one_typo: Setting::NotSet, exact_attributes: Setting::NotSet, max_values_per_facet: Setting::NotSet, + sort_facet_values_by: Setting::NotSet, pagination_max_total_hits: Setting::NotSet, indexer_config, } @@ -275,6 +277,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.max_values_per_facet = Setting::Reset; } + pub fn set_sort_facet_values_by(&mut self, value: HashMap) { + self.sort_facet_values_by = Setting::Set(value); + } + + pub fn reset_sort_facet_values_by(&mut self) { + self.sort_facet_values_by = Setting::Reset; + } + pub fn set_pagination_max_total_hits(&mut self, value: usize) { self.pagination_max_total_hits = Setting::Set(value); } @@ -680,6 +690,20 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { Ok(()) } + fn update_sort_facet_values_by(&mut self) -> Result<()> { + match self.sort_facet_values_by.as_ref() { + Setting::Set(value) => { + self.index.put_sort_facet_values_by(self.wtxn, value)?; + } + Setting::Reset => { + self.index.delete_sort_facet_values_by(self.wtxn)?; + } + Setting::NotSet => (), + } + + Ok(()) + } + fn update_pagination_max_total_hits(&mut self) -> Result<()> { match self.pagination_max_total_hits { Setting::Set(max) => { @@ -714,6 +738,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.update_min_typo_word_len()?; self.update_exact_words()?; self.update_max_values_per_facet()?; + self.update_sort_facet_values_by()?; self.update_pagination_max_total_hits()?; // If there is new faceted fields we indicate that we must reindex as we must @@ -1515,6 +1540,7 @@ mod tests { exact_words, exact_attributes, max_values_per_facet, + sort_facet_values_by, pagination_max_total_hits, } = settings; assert!(matches!(searchable_fields, Setting::NotSet)); @@ -1532,6 +1558,7 @@ mod tests { assert!(matches!(exact_words, Setting::NotSet)); assert!(matches!(exact_attributes, Setting::NotSet)); assert!(matches!(max_values_per_facet, Setting::NotSet)); + assert!(matches!(sort_facet_values_by, Setting::NotSet)); assert!(matches!(pagination_max_total_hits, Setting::NotSet)); }) .unwrap(); diff --git a/milli/tests/search/facet_distribution.rs b/milli/tests/search/facet_distribution.rs index e2f89f2db..03405531d 100644 --- a/milli/tests/search/facet_distribution.rs +++ b/milli/tests/search/facet_distribution.rs @@ -5,7 +5,7 @@ use heed::EnvOpenOptions; use maplit::hashset; use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings}; -use milli::{FacetDistribution, Index, Object}; +use milli::{FacetDistribution, Index, Object, OrderBy}; use serde_json::Deserializer; #[test] @@ -63,12 +63,12 @@ fn test_facet_distribution_with_no_facet_values() { let txn = index.read_txn().unwrap(); let mut distrib = FacetDistribution::new(&txn, &index); - distrib.facets(vec!["genres"]); + distrib.facets(vec![("genres", OrderBy::default())]); let result = distrib.execute().unwrap(); assert_eq!(result["genres"].len(), 0); let mut distrib = FacetDistribution::new(&txn, &index); - distrib.facets(vec!["tags"]); + distrib.facets(vec![("tags", OrderBy::default())]); let result = distrib.execute().unwrap(); assert_eq!(result["tags"].len(), 2); } From b132e859f72c1f4ec26897017c7d6ed73ebc9646 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Thu, 22 Jun 2023 18:26:38 +0200 Subject: [PATCH 09/19] Make clippy happy --- meilisearch/src/search.rs | 2 +- milli/src/search/facet/facet_distribution.rs | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 5c45e030d..239fc5c8d 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -582,7 +582,7 @@ pub fn perform_search( if fields.iter().all(|f| f != "*") { let fields: Vec<_> = fields - .into_iter() + .iter() .map(|n| { ( n, diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index fa6cc2fa0..cf922f646 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -296,7 +296,7 @@ impl<'a> FacetDistribution<'a> { } .into()); } else { - facets.into_iter().map(|(name, _)| name).cloned().collect() + facets.iter().map(|(name, _)| name).cloned().collect() } } None => filterable_fields, @@ -351,7 +351,7 @@ impl<'a> FacetDistribution<'a> { } .into()); } else { - facets.into_iter().map(|(name, _)| name).cloned().collect() + facets.iter().map(|(name, _)| name).cloned().collect() } } None => filterable_fields, @@ -363,8 +363,7 @@ impl<'a> FacetDistribution<'a> { let order_by = self .facets .as_ref() - .map(|facets| facets.get(name).copied()) - .flatten() + .and_then(|facets| facets.get(name).copied()) .unwrap_or(self.default_order_by); let values = self.facet_values(fid, order_by)?; distribution.insert(name.to_string(), values); From eed9176e0cfab2d3025e112a00779785e1b693ff Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 26 Jun 2023 13:10:36 +0200 Subject: [PATCH 10/19] Also reset the sortFacetValuesBy when reseting the faceting settings --- meilisearch-types/src/settings.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs index fa47a3dd7..14a337678 100644 --- a/meilisearch-types/src/settings.rs +++ b/meilisearch-types/src/settings.rs @@ -417,7 +417,10 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } } - Setting::Reset => builder.reset_max_values_per_facet(), + Setting::Reset => { + builder.reset_max_values_per_facet(); + builder.reset_sort_facet_values_by(); + } Setting::NotSet => (), } From 1d8dfafd25b254eab564c28b373a03c08ef3b879 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 27 Jun 2023 14:54:49 +0200 Subject: [PATCH 11/19] Add analytics when all facets are sorted by count and the number of modified ones --- meilisearch/src/routes/indexes/settings.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index 7035093eb..fb5d8ff7a 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -401,13 +401,17 @@ make_setting_route!( analytics, |setting: &Option, req: &HttpRequest| { use serde_json::json; + use meilisearch_types::facet_values_sort::FacetValuesSort; analytics.publish( "Faceting Updated".to_string(), json!({ "faceting": { "max_values_per_facet": setting.as_ref().and_then(|s| s.max_values_per_facet.set()), - "sort_facet_values_by": setting.as_ref().and_then(|s| s.sort_facet_values_by.clone().set()), + "sort_facet_values_by_star_count": setting.as_ref().and_then(|s| { + s.sort_facet_values_by.as_ref().set().map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count)) + }), + "sort_facet_values_by_total": setting.as_ref().and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())), }, }), Some(req), From 9a13b72f25520842f2286d5693616f968f0f9eb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 27 Jun 2023 15:31:11 +0200 Subject: [PATCH 12/19] Fix the tests --- meilisearch/tests/dumps/mod.rs | 24 +++++++++++----------- meilisearch/tests/settings/get_settings.rs | 6 ++++++ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs index 31fa49b60..ce225cdf7 100644 --- a/meilisearch/tests/dumps/mod.rs +++ b/meilisearch/tests/dumps/mod.rs @@ -36,7 +36,7 @@ async fn import_dump_v1_movie_raw() { assert_eq!(code, 200); assert_eq!( settings, - json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["typo", "words", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } }) + json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["typo", "words", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) ); let (tasks, code) = index.list_tasks().await; @@ -128,7 +128,7 @@ async fn import_dump_v1_movie_with_settings() { assert_eq!(code, 200); assert_eq!( settings, - json!({ "displayedAttributes": ["genres", "id", "overview", "poster", "release_date", "title"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": ["genres"], "rankingRules": ["typo", "words", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } }) + json!({ "displayedAttributes": ["genres", "id", "overview", "poster", "release_date", "title"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": ["genres"], "rankingRules": ["typo", "words", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) ); let (tasks, code) = index.list_tasks().await; @@ -220,7 +220,7 @@ async fn import_dump_v1_rubygems_with_settings() { assert_eq!(code, 200); assert_eq!( settings, - json!({"displayedAttributes": ["description", "id", "name", "summary", "total_downloads", "version"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": ["version"], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 }}) + json!({"displayedAttributes": ["description", "id", "name", "summary", "total_downloads", "version"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": ["version"], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 }}) ); let (tasks, code) = index.list_tasks().await; @@ -310,7 +310,7 @@ async fn import_dump_v2_movie_raw() { assert_eq!(code, 200); assert_eq!( settings, - json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } }) + json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) ); let (tasks, code) = index.list_tasks().await; @@ -402,7 +402,7 @@ async fn import_dump_v2_movie_with_settings() { assert_eq!(code, 200); assert_eq!( settings, - json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } }) + json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) ); let (tasks, code) = index.list_tasks().await; @@ -494,7 +494,7 @@ async fn import_dump_v2_rubygems_with_settings() { assert_eq!(code, 200); assert_eq!( settings, - json!({"displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 }}) + json!({"displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 }}) ); let (tasks, code) = index.list_tasks().await; @@ -584,7 +584,7 @@ async fn import_dump_v3_movie_raw() { assert_eq!(code, 200); assert_eq!( settings, - json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } }) + json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) ); let (tasks, code) = index.list_tasks().await; @@ -676,7 +676,7 @@ async fn import_dump_v3_movie_with_settings() { assert_eq!(code, 200); assert_eq!( settings, - json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } }) + json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) ); let (tasks, code) = index.list_tasks().await; @@ -768,7 +768,7 @@ async fn import_dump_v3_rubygems_with_settings() { assert_eq!(code, 200); assert_eq!( settings, - json!({"displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } }) + json!({"displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) ); let (tasks, code) = index.list_tasks().await; @@ -858,7 +858,7 @@ async fn import_dump_v4_movie_raw() { assert_eq!(code, 200); assert_eq!( settings, - json!({ "displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } }) + json!({ "displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) ); let (tasks, code) = index.list_tasks().await; @@ -950,7 +950,7 @@ async fn import_dump_v4_movie_with_settings() { assert_eq!(code, 200); assert_eq!( settings, - json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } }) + json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) ); let (tasks, code) = index.list_tasks().await; @@ -1042,7 +1042,7 @@ async fn import_dump_v4_rubygems_with_settings() { assert_eq!(code, 200); assert_eq!( settings, - json!({ "displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } }) + json!({ "displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) ); let (tasks, code) = index.list_tasks().await; diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs index 88e395b57..d39cbd96e 100644 --- a/meilisearch/tests/settings/get_settings.rs +++ b/meilisearch/tests/settings/get_settings.rs @@ -21,6 +21,9 @@ static DEFAULT_SETTINGS_VALUES: Lazy> = Lazy::new(| "faceting", json!({ "maxValuesPerFacet": json!(100), + "sortFacetValuesBy": { + "*": "alpha" + } }), ); map.insert( @@ -63,6 +66,9 @@ async fn get_settings() { settings["faceting"], json!({ "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha" + } }) ); assert_eq!( From b9518304612b6bf99ac7558ece61d2919cd2251c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 27 Jun 2023 15:42:17 +0200 Subject: [PATCH 13/19] Add more tests --- milli/src/search/facet/facet_distribution.rs | 74 +++++++++++++------- 1 file changed, 47 insertions(+), 27 deletions(-) diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index cf922f646..cfaffed8c 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -396,6 +396,8 @@ impl fmt::Debug for FacetDistribution<'_> { #[cfg(test)] mod tests { + use std::iter; + use big_s::S; use maplit::hashset; @@ -426,14 +428,14 @@ mod tests { let txn = index.read_txn().unwrap(); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .execute() .unwrap(); milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .candidates([0, 1, 2].iter().copied().collect()) .execute() .unwrap(); @@ -441,7 +443,7 @@ mod tests { milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .candidates([1, 2].iter().copied().collect()) .execute() .unwrap(); @@ -452,7 +454,7 @@ mod tests { milli_snap!(format!("{map:?}"), @r###"{"colour": {" blue": 1, "RED": 1}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .candidates([2].iter().copied().collect()) .execute() .unwrap(); @@ -460,13 +462,22 @@ mod tests { milli_snap!(format!("{map:?}"), @r###"{"colour": {"RED": 1}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .candidates([0, 1, 2].iter().copied().collect()) .max_values_per_facet(1) .execute() .unwrap(); milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 1}}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(iter::once(("colour", OrderBy::Count))) + .candidates([0, 1, 2].iter().copied().collect()) + .max_values_per_facet(1) + .execute() + .unwrap(); + + milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2}}"###); } #[test] @@ -498,14 +509,14 @@ mod tests { let txn = index.read_txn().unwrap(); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .execute() .unwrap(); milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000, "Red": 6000}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .max_values_per_facet(1) .execute() .unwrap(); @@ -513,7 +524,7 @@ mod tests { milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .candidates((0..10_000).collect()) .execute() .unwrap(); @@ -521,7 +532,7 @@ mod tests { milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000, "Red": 6000}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .candidates((0..5_000).collect()) .execute() .unwrap(); @@ -529,7 +540,7 @@ mod tests { milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000, "Red": 3000}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .candidates((0..5_000).collect()) .execute() .unwrap(); @@ -537,13 +548,22 @@ mod tests { milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000, "Red": 3000}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .candidates((0..5_000).collect()) .max_values_per_facet(1) .execute() .unwrap(); milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000}}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(iter::once(("colour", OrderBy::Count))) + .candidates((0..5_000).collect()) + .max_values_per_facet(1) + .execute() + .unwrap(); + + milli_snap!(format!("{map:?}"), @r###"{"colour": {"Red": 3000}}"###); } #[test] @@ -575,14 +595,14 @@ mod tests { let txn = index.read_txn().unwrap(); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .execute() .unwrap(); milli_snap!(format!("{map:?}"), "no_candidates", @"ac9229ed5964d893af96a7076e2f8af5"); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .max_values_per_facet(2) .execute() .unwrap(); @@ -590,7 +610,7 @@ mod tests { milli_snap!(format!("{map:?}"), "no_candidates_with_max_2", @r###"{"colour": {"0": 10, "1": 10}}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .candidates((0..10_000).collect()) .execute() .unwrap(); @@ -598,7 +618,7 @@ mod tests { milli_snap!(format!("{map:?}"), "candidates_0_10_000", @"ac9229ed5964d893af96a7076e2f8af5"); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .candidates((0..5_000).collect()) .execute() .unwrap(); @@ -635,14 +655,14 @@ mod tests { let txn = index.read_txn().unwrap(); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .compute_stats() .unwrap(); milli_snap!(format!("{map:?}"), "no_candidates", @"{}"); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .candidates((0..1000).collect()) .compute_stats() .unwrap(); @@ -650,7 +670,7 @@ mod tests { milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .candidates((217..777).collect()) .compute_stats() .unwrap(); @@ -687,14 +707,14 @@ mod tests { let txn = index.read_txn().unwrap(); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .compute_stats() .unwrap(); milli_snap!(format!("{map:?}"), "no_candidates", @"{}"); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .candidates((0..1000).collect()) .compute_stats() .unwrap(); @@ -702,7 +722,7 @@ mod tests { milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1999.0)}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .candidates((217..777).collect()) .compute_stats() .unwrap(); @@ -739,14 +759,14 @@ mod tests { let txn = index.read_txn().unwrap(); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .compute_stats() .unwrap(); milli_snap!(format!("{map:?}"), "no_candidates", @"{}"); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .candidates((0..1000).collect()) .compute_stats() .unwrap(); @@ -754,7 +774,7 @@ mod tests { milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .candidates((217..777).collect()) .compute_stats() .unwrap(); @@ -795,14 +815,14 @@ mod tests { let txn = index.read_txn().unwrap(); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .compute_stats() .unwrap(); milli_snap!(format!("{map:?}"), "no_candidates", @"{}"); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .candidates((0..1000).collect()) .compute_stats() .unwrap(); @@ -810,7 +830,7 @@ mod tests { milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1998.0)}"###); let map = FacetDistribution::new(&txn, &index) - .facets(std::iter::once(("colour", OrderBy::default()))) + .facets(iter::once(("colour", OrderBy::default()))) .candidates((217..777).collect()) .compute_stats() .unwrap(); From 3c295c1ffc97e1f3ab6e62d02c7e210f68f5e629 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 28 Jun 2023 14:23:03 +0200 Subject: [PATCH 14/19] Fix typos --- meilisearch-types/src/settings.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs index 14a337678..a5fb10074 100644 --- a/meilisearch-types/src/settings.rs +++ b/meilisearch-types/src/settings.rs @@ -16,7 +16,7 @@ use crate::deserr::DeserrJsonError; use crate::error::deserr_codes::*; use crate::facet_values_sort::FacetValuesSort; -/// The maximimum number of results that the engine +/// The maximum number of results that the engine /// will be able to return in one search call. pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000; From a0e0fce677fa30338d5959dc5156ce6f0a91462a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 28 Jun 2023 14:29:12 +0200 Subject: [PATCH 15/19] Simplify a Rust lifetime trick --- milli/src/search/facet/facet_distribution.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index cfaffed8c..acf117ef6 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -246,14 +246,13 @@ impl<'a> FacetDistribution<'a> { } _ => { let universe; - let candidates; - match &self.candidates { - Some(cnd) => candidates = cnd, + let candidates = match &self.candidates { + Some(cnd) => cnd, None => { universe = self.index.documents_ids(self.rtxn)?; - candidates = &universe; + &universe } - } + }; self.facet_numbers_distribution_from_facet_levels( field_id, From 0b97596c9335c16728859b4f938a1d49ed0322e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 28 Jun 2023 14:33:14 +0200 Subject: [PATCH 16/19] Replace unwraps with ? --- milli/src/search/facet/facet_distribution_iter.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/milli/src/search/facet/facet_distribution_iter.rs index 475022f29..0c37906ef 100644 --- a/milli/src/search/facet/facet_distribution_iter.rs +++ b/milli/src/search/facet/facet_distribution_iter.rs @@ -82,8 +82,8 @@ where // We first fill the heap with values from the highest level let starting_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; - for el in db.range(rtxn, &(&starting_key..)).unwrap().take(usize::MAX) { - let (key, value) = el.unwrap(); + for el in db.range(rtxn, &(&starting_key..))?.take(usize::MAX) { + let (key, value) = el?; // The range is unbounded on the right and the group size for the highest level is MAX, // so we need to check that we are not iterating over the next field id if key.field_id != field_id { @@ -111,8 +111,8 @@ where } } else { let starting_key = FacetGroupKey { field_id, level: level.0 - 1, left_bound }; - for el in db.range(rtxn, &(&starting_key..)).unwrap().take(group_size as usize) { - let (key, value) = el.unwrap(); + for el in db.range(rtxn, &(&starting_key..))?.take(group_size as usize) { + let (key, value) = el?; // The range is unbounded on the right and the group size for the highest level is MAX, // so we need to check that we are not iterating over the next field id if key.field_id != field_id { @@ -193,10 +193,10 @@ where } let starting_key = FacetGroupKey { field_id: self.field_id, level, left_bound: starting_bound }; - let iter = self.db.range(self.rtxn, &(&starting_key..)).unwrap().take(group_size); + let iter = self.db.range(self.rtxn, &(&starting_key..))?.take(group_size); for el in iter { - let (key, value) = el.unwrap(); + let (key, value) = el?; // The range is unbounded on the right and the group size for the highest level is MAX, // so we need to check that we are not iterating over the next field id if key.field_id != self.field_id { From 7c157fc442d467a4ce916f0547c79e836df093e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 28 Jun 2023 14:35:36 +0200 Subject: [PATCH 17/19] Document that the LevelEntry fields order is important --- milli/src/search/facet/facet_distribution_iter.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/milli/src/search/facet/facet_distribution_iter.rs index 0c37906ef..722a30e6d 100644 --- a/milli/src/search/facet/facet_distribution_iter.rs +++ b/milli/src/search/facet/facet_distribution_iter.rs @@ -56,6 +56,9 @@ pub fn count_iterate_over_facet_distribution<'t, CB>( where CB: FnMut(&'t [u8], u64, DocumentId) -> Result>, { + /// # Important + /// The order of the fields determines the order in which the facet values will be returned. + /// This struct is inserted in a BinaryHeap and popped later on. #[derive(Debug, PartialOrd, Ord, PartialEq, Eq)] struct LevelEntry<'t> { /// The number of candidates in this entry. From 4e85f91aee081fb886eb0bff582226a48579345e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 28 Jun 2023 14:50:01 +0200 Subject: [PATCH 18/19] Add a non default value to the faceting settings of the dump tests --- dump/src/lib.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/dump/src/lib.rs b/dump/src/lib.rs index 5ae9ddfed..1e21eed05 100644 --- a/dump/src/lib.rs +++ b/dump/src/lib.rs @@ -208,12 +208,13 @@ pub(crate) mod test { use std::str::FromStr; use big_s::S; - use maplit::btreeset; + use maplit::{btreemap, btreeset}; + use meilisearch_types::facet_values_sort::FacetValuesSort; use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::keys::{Action, Key}; + use meilisearch_types::milli; use meilisearch_types::milli::update::Setting; - use meilisearch_types::milli::{self}; - use meilisearch_types::settings::{Checked, Settings}; + use meilisearch_types::settings::{Checked, FacetingSettings, Settings}; use meilisearch_types::tasks::{Details, Status}; use serde_json::{json, Map, Value}; use time::macros::datetime; @@ -263,7 +264,12 @@ pub(crate) mod test { synonyms: Setting::NotSet, distinct_attribute: Setting::NotSet, typo_tolerance: Setting::NotSet, - faceting: Setting::NotSet, + faceting: Setting::Set(FacetingSettings { + max_values_per_facet: Setting::Set(111), + sort_facet_values_by: Setting::Set( + btreemap! { S("age") => FacetValuesSort::Count }, + ), + }), pagination: Setting::NotSet, _kind: std::marker::PhantomData, }; From 09c5edf242d4e4b47aafe077c48b774818924746 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 29 Jun 2023 14:37:18 +0200 Subject: [PATCH 19/19] Cargo fmt --- meilisearch/src/search.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 239fc5c8d..85fef13b3 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -15,7 +15,7 @@ use meilisearch_types::heed::RoTxn; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy}; use meilisearch_types::milli::{ - dot_product_similarity, FacetValueHit, OrderBy, InternalError, SearchForFacetValues, + dot_product_similarity, FacetValueHit, InternalError, OrderBy, SearchForFacetValues, }; use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::{milli, Document};