diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index f48563141..6bf23cfc4 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -108,7 +108,7 @@ pub struct SearchHit { pub matches_position: Option, } -#[derive(Serialize, Debug, Clone, PartialEq, Eq)] +#[derive(Serialize, Debug, Clone, PartialEq)] #[serde(rename_all = "camelCase")] pub struct SearchResult { pub hits: Vec, @@ -118,6 +118,8 @@ pub struct SearchResult { pub hits_info: HitsInfo, #[serde(skip_serializing_if = "Option::is_none")] pub facet_distribution: Option>>, + #[serde(skip_serializing_if = "Option::is_none")] + pub facet_stats: Option>, } #[derive(Serialize, Debug, Clone, PartialEq, Eq)] @@ -129,6 +131,12 @@ pub enum HitsInfo { OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize }, } +#[derive(Serialize, Debug, Clone, PartialEq)] +pub struct FacetStats { + pub min: f64, + pub max: f64, +} + pub fn perform_search( index: &Index, query: SearchQuery, @@ -300,7 +308,7 @@ pub fn perform_search( HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits } }; - let facet_distribution = match query.facets { + let (facet_distribution, facet_stats) = match query.facets { Some(ref fields) => { let mut facet_distribution = index.facets_distribution(&rtxn); @@ -314,18 +322,23 @@ pub fn perform_search( facet_distribution.facets(fields); } let distribution = facet_distribution.candidates(candidates).execute()?; - - Some(distribution) + let stats = facet_distribution.compute_stats()?; + (Some(distribution), Some(stats)) } - None => None, + None => (None, None), }; + let facet_stats = facet_stats.map(|stats| { + stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect() + }); + let result = SearchResult { hits: documents, hits_info, query: query.q.clone().unwrap_or_default(), processing_time_ms: before_search.elapsed().as_millis(), facet_distribution, + facet_stats, }; Ok(result) } diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index b5afe6778..378e1c8da 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -1,5 +1,6 @@ use std::mem::take; +use heed::BytesDecode; use itertools::Itertools; use log::debug; use ordered_float::OrderedFloat; @@ -7,7 +8,7 @@ use roaring::RoaringBitmap; use super::{Criterion, CriterionParameters, CriterionResult}; use crate::facet::FacetType; -use crate::heed_codec::facet::FacetGroupKeyCodec; +use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec}; use crate::heed_codec::ByteSliceRefCodec; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates}; use crate::search::facet::{ascending_facet_sort, descending_facet_sort}; @@ -196,6 +197,38 @@ fn facet_ordered_iterative<'t>( Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box>) } +fn facet_extreme_value<'t>( + mut extreme_it: impl Iterator> + 't, +) -> Result> { + let extreme_value = + if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) }; + let (_, extreme_value) = extreme_value?; + + Ok(OrderedF64Codec::bytes_decode(extreme_value)) +} + +pub fn facet_min_value<'t>( + index: &'t Index, + rtxn: &'t heed::RoTxn, + field_id: FieldId, + candidates: RoaringBitmap, +) -> Result> { + let db = index.facet_id_f64_docids.remap_key_type::>(); + let it = ascending_facet_sort(rtxn, db, field_id, candidates)?; + facet_extreme_value(it) +} + +pub fn facet_max_value<'t>( + index: &'t Index, + rtxn: &'t heed::RoTxn, + field_id: FieldId, + candidates: RoaringBitmap, +) -> Result> { + let db = index.facet_id_f64_docids.remap_key_type::>(); + let it = descending_facet_sort(rtxn, db, field_id, candidates)?; + facet_extreme_value(it) +} + fn facet_ordered_set_based<'t>( index: &'t Index, rtxn: &'t heed::RoTxn, @@ -203,23 +236,24 @@ fn facet_ordered_set_based<'t>( is_ascending: bool, candidates: RoaringBitmap, ) -> Result> + 't>> { - let make_iter = if is_ascending { ascending_facet_sort } else { descending_facet_sort }; + let number_db = + index.facet_id_f64_docids.remap_key_type::>(); + let string_db = + index.facet_id_string_docids.remap_key_type::>(); - let number_iter = make_iter( - rtxn, - index.facet_id_f64_docids.remap_key_type::>(), - field_id, - candidates.clone(), - )?; + let (number_iter, string_iter) = if is_ascending { + let number_iter = ascending_facet_sort(rtxn, number_db, field_id, candidates.clone())?; + let string_iter = ascending_facet_sort(rtxn, string_db, field_id, candidates)?; - let string_iter = make_iter( - rtxn, - index.facet_id_string_docids.remap_key_type::>(), - field_id, - candidates, - )?; + (itertools::Either::Left(number_iter), itertools::Either::Left(string_iter)) + } else { + let number_iter = descending_facet_sort(rtxn, number_db, field_id, candidates.clone())?; + let string_iter = descending_facet_sort(rtxn, string_db, field_id, candidates)?; - Ok(Box::new(number_iter.chain(string_iter))) + (itertools::Either::Right(number_iter), itertools::Either::Right(string_iter)) + }; + + Ok(Box::new(number_iter.chain(string_iter).map(|res| res.map(|(doc_ids, _)| doc_ids)))) } /// Returns an iterator over groups of the given candidates in ascending or descending order. diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index 06fba1a1b..0c1c8add1 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -21,6 +21,7 @@ use crate::update::{MAX_LENGTH_FOR_PREFIX_PROXIMITY_DB, MAX_PROXIMITY_FOR_PREFIX use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result}; mod asc_desc; +pub use asc_desc::{facet_max_value, facet_min_value}; mod attribute; mod exactness; pub mod r#final; diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 4d5028ce0..2aae78bb2 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -278,6 +278,65 @@ impl<'a> FacetDistribution<'a> { } } + pub fn compute_stats(&self) -> Result> { + let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; + let filterable_fields = self.index.filterable_fields(self.rtxn)?; + let candidates = if let Some(candidates) = self.candidates.clone() { + candidates + } else { + return Ok(Default::default()); + }; + + let fields = match &self.facets { + Some(facets) => { + let invalid_fields: HashSet<_> = facets + .iter() + .filter(|facet| !crate::is_faceted(facet, &filterable_fields)) + .collect(); + if !invalid_fields.is_empty() { + return Err(UserError::InvalidFacetsDistribution { + invalid_facets_name: invalid_fields.into_iter().cloned().collect(), + valid_facets_name: filterable_fields.into_iter().collect(), + } + .into()); + } else { + facets.clone() + } + } + None => filterable_fields, + }; + + let mut distribution = BTreeMap::new(); + for (fid, name) in fields_ids_map.iter() { + if crate::is_faceted(name, &fields) { + let min_value = if let Some(min_value) = crate::search::criteria::facet_min_value( + self.index, + self.rtxn, + fid, + candidates.clone(), + )? { + min_value + } else { + continue; + }; + let max_value = if let Some(max_value) = crate::search::criteria::facet_max_value( + self.index, + self.rtxn, + fid, + candidates.clone(), + )? { + max_value + } else { + continue; + }; + + distribution.insert(name.to_string(), (min_value, max_value)); + } + } + + Ok(distribution) + } + pub fn execute(&self) -> Result>> { let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; let filterable_fields = self.index.filterable_fields(self.rtxn)?; @@ -537,4 +596,216 @@ mod tests { milli_snap!(format!("{map:?}"), "candidates_0_5_000", @"825f23a4090d05756f46176987b7d992"); } + + #[test] + fn facet_stats() { + let mut index = TempIndex::new_with_map_size(4096 * 10_000); + index.index_documents_config.autogenerate_docids = true; + + index + .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .unwrap(); + + let facet_values = (0..1000).into_iter().collect::>(); + + let mut documents = vec![]; + for i in 0..1000 { + let document = serde_json::json!({ + "colour": facet_values[i % 1000], + }) + .as_object() + .unwrap() + .clone(); + documents.push(document); + } + + let documents = documents_batch_reader_from_objects(documents); + + index.add_documents(documents).unwrap(); + + let txn = index.read_txn().unwrap(); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "no_candidates", @"{}"); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((0..1000).into_iter().collect()) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((217..777).into_iter().collect()) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 776.0)}"###); + } + + #[test] + fn facet_stats_array() { + let mut index = TempIndex::new_with_map_size(4096 * 10_000); + index.index_documents_config.autogenerate_docids = true; + + index + .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .unwrap(); + + let facet_values = (0..1000).into_iter().collect::>(); + + let mut documents = vec![]; + for i in 0..1000 { + let document = serde_json::json!({ + "colour": [facet_values[i % 1000], facet_values[i % 1000] + 1000], + }) + .as_object() + .unwrap() + .clone(); + documents.push(document); + } + + let documents = documents_batch_reader_from_objects(documents); + + index.add_documents(documents).unwrap(); + + let txn = index.read_txn().unwrap(); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "no_candidates", @"{}"); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((0..1000).into_iter().collect()) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1999.0)}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((217..777).into_iter().collect()) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 1776.0)}"###); + } + + #[test] + fn facet_stats_mixed_array() { + let mut index = TempIndex::new_with_map_size(4096 * 10_000); + index.index_documents_config.autogenerate_docids = true; + + index + .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .unwrap(); + + let facet_values = (0..1000).into_iter().collect::>(); + + let mut documents = vec![]; + for i in 0..1000 { + let document = serde_json::json!({ + "colour": [facet_values[i % 1000], format!("{}", facet_values[i % 1000] + 1000)], + }) + .as_object() + .unwrap() + .clone(); + documents.push(document); + } + + let documents = documents_batch_reader_from_objects(documents); + + index.add_documents(documents).unwrap(); + + let txn = index.read_txn().unwrap(); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "no_candidates", @"{}"); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((0..1000).into_iter().collect()) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((217..777).into_iter().collect()) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 776.0)}"###); + } + + #[test] + fn facet_mixed_values() { + let mut index = TempIndex::new_with_map_size(4096 * 10_000); + index.index_documents_config.autogenerate_docids = true; + + index + .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .unwrap(); + + let facet_values = (0..1000).into_iter().collect::>(); + + let mut documents = vec![]; + for i in 0..1000 { + let document = if i % 2 == 0 { + serde_json::json!({ + "colour": [facet_values[i % 1000], facet_values[i % 1000] + 1000], + }) + } else { + serde_json::json!({ + "colour": format!("{}", facet_values[i % 1000] + 10000), + }) + }; + let document = document.as_object().unwrap().clone(); + documents.push(document); + } + + let documents = documents_batch_reader_from_objects(documents); + + index.add_documents(documents).unwrap(); + + let txn = index.read_txn().unwrap(); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "no_candidates", @"{}"); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((0..1000).into_iter().collect()) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1998.0)}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((217..777).into_iter().collect()) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (218.0, 1776.0)}"###); + } } diff --git a/milli/src/search/facet/facet_sort_ascending.rs b/milli/src/search/facet/facet_sort_ascending.rs index 32cf5c355..f59b884de 100644 --- a/milli/src/search/facet/facet_sort_ascending.rs +++ b/milli/src/search/facet/facet_sort_ascending.rs @@ -34,15 +34,20 @@ pub fn ascending_facet_sort<'t>( db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: RoaringBitmap, -) -> Result> + 't>> { +) -> Result> + 't> { let highest_level = get_highest_level(rtxn, db, field_id)?; if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX); - Ok(Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] })) + Ok(itertools::Either::Left(AscendingFacetSort { + rtxn, + db, + field_id, + stack: vec![(candidates, iter)], + })) } else { - Ok(Box::new(std::iter::empty())) + Ok(itertools::Either::Right(std::iter::empty())) } } @@ -60,7 +65,7 @@ struct AscendingFacetSort<'t, 'e> { } impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> { - type Item = Result; + type Item = Result<(RoaringBitmap, &'t [u8])>; fn next(&mut self) -> Option { 'outer: loop { @@ -90,7 +95,8 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> { *documents_ids -= &bitmap; if level == 0 { - return Some(Ok(bitmap)); + // Since the level is 0, the left_bound is the exact value. + return Some(Ok((bitmap, left_bound))); } let starting_key_below = FacetGroupKey { field_id: self.field_id, level: level - 1, left_bound }; @@ -130,7 +136,7 @@ mod tests { let mut results = String::new(); let iter = ascending_facet_sort(&txn, index.content, 0, candidates).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -152,7 +158,7 @@ mod tests { let mut results = String::new(); let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -161,7 +167,7 @@ mod tests { let mut results = String::new(); let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -183,7 +189,7 @@ mod tests { let mut results = String::new(); let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -192,7 +198,7 @@ mod tests { let mut results = String::new(); let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -214,7 +220,7 @@ mod tests { let mut results = String::new(); let iter = ascending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } diff --git a/milli/src/search/facet/facet_sort_descending.rs b/milli/src/search/facet/facet_sort_descending.rs index 4d1fdd1e7..454b12859 100644 --- a/milli/src/search/facet/facet_sort_descending.rs +++ b/milli/src/search/facet/facet_sort_descending.rs @@ -17,21 +17,21 @@ pub fn descending_facet_sort<'t>( db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: RoaringBitmap, -) -> Result> + 't>> { +) -> Result> + 't> { let highest_level = get_highest_level(rtxn, db, field_id)?; if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; let last_bound = get_last_facet_value::(rtxn, db, field_id)?.unwrap(); let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound }; let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX); - Ok(Box::new(DescendingFacetSort { + Ok(itertools::Either::Left(DescendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter, Bound::Included(last_bound))], })) } else { - Ok(Box::new(std::iter::empty())) + Ok(itertools::Either::Right(std::iter::empty())) } } @@ -50,7 +50,7 @@ struct DescendingFacetSort<'t> { } impl<'t> Iterator for DescendingFacetSort<'t> { - type Item = Result; + type Item = Result<(RoaringBitmap, &'t [u8])>; fn next(&mut self) -> Option { 'outer: loop { @@ -77,7 +77,8 @@ impl<'t> Iterator for DescendingFacetSort<'t> { *documents_ids -= &bitmap; if level == 0 { - return Some(Ok(bitmap)); + // Since we're at the level 0 the left_bound is the exact value. + return Some(Ok((bitmap, left_bound))); } let starting_key_below = FacetGroupKey { field_id, level: level - 1, left_bound }; @@ -146,7 +147,7 @@ mod tests { let db = index.content.remap_key_type::>(); let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -169,7 +170,7 @@ mod tests { let db = index.content.remap_key_type::>(); let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -179,7 +180,7 @@ mod tests { let iter = descending_facet_sort(&txn, db, 1, candidates).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -200,7 +201,7 @@ mod tests { let mut results = String::new(); let iter = descending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -209,7 +210,7 @@ mod tests { let mut results = String::new(); let iter = descending_facet_sort(&txn, index.content, 1, candidates).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -231,7 +232,7 @@ mod tests { let mut results = String::new(); let iter = descending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); }