From 042d86cbb3c5ea6251f24ec633d00ace32d12803 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 1 Feb 2023 14:40:42 +0100 Subject: [PATCH 1/5] facet sort ascending/descending now also return the values --- .../src/search/facet/facet_sort_ascending.rs | 19 ++++++++++--------- .../src/search/facet/facet_sort_descending.rs | 19 ++++++++++--------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/milli/src/search/facet/facet_sort_ascending.rs b/milli/src/search/facet/facet_sort_ascending.rs index 32cf5c355..b0f1dccd5 100644 --- a/milli/src/search/facet/facet_sort_ascending.rs +++ b/milli/src/search/facet/facet_sort_ascending.rs @@ -34,7 +34,7 @@ pub fn ascending_facet_sort<'t>( db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: RoaringBitmap, -) -> Result> + 't>> { +) -> Result> + 't>> { let highest_level = get_highest_level(rtxn, db, field_id)?; if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; @@ -60,7 +60,7 @@ struct AscendingFacetSort<'t, 'e> { } impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> { - type Item = Result; + type Item = Result<(RoaringBitmap, &'t [u8])>; fn next(&mut self) -> Option { 'outer: loop { @@ -90,7 +90,8 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> { *documents_ids -= &bitmap; if level == 0 { - return Some(Ok(bitmap)); + // Since the level is 0, the left_bound is the exact value. + return Some(Ok((bitmap, left_bound))); } let starting_key_below = FacetGroupKey { field_id: self.field_id, level: level - 1, left_bound }; @@ -130,7 +131,7 @@ mod tests { let mut results = String::new(); let iter = ascending_facet_sort(&txn, index.content, 0, candidates).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -152,7 +153,7 @@ mod tests { let mut results = String::new(); let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -161,7 +162,7 @@ mod tests { let mut results = String::new(); let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -183,7 +184,7 @@ mod tests { let mut results = String::new(); let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -192,7 +193,7 @@ mod tests { let mut results = String::new(); let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -214,7 +215,7 @@ mod tests { let mut results = String::new(); let iter = ascending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } diff --git a/milli/src/search/facet/facet_sort_descending.rs b/milli/src/search/facet/facet_sort_descending.rs index 4d1fdd1e7..fbcc41b9d 100644 --- a/milli/src/search/facet/facet_sort_descending.rs +++ b/milli/src/search/facet/facet_sort_descending.rs @@ -17,7 +17,7 @@ pub fn descending_facet_sort<'t>( db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: RoaringBitmap, -) -> Result> + 't>> { +) -> Result> + 't>> { let highest_level = get_highest_level(rtxn, db, field_id)?; if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; @@ -50,7 +50,7 @@ struct DescendingFacetSort<'t> { } impl<'t> Iterator for DescendingFacetSort<'t> { - type Item = Result; + type Item = Result<(RoaringBitmap, &'t [u8])>; fn next(&mut self) -> Option { 'outer: loop { @@ -77,7 +77,8 @@ impl<'t> Iterator for DescendingFacetSort<'t> { *documents_ids -= &bitmap; if level == 0 { - return Some(Ok(bitmap)); + // Since we're at the level 0 the left_bound is the exact value. + return Some(Ok((bitmap, left_bound))); } let starting_key_below = FacetGroupKey { field_id, level: level - 1, left_bound }; @@ -146,7 +147,7 @@ mod tests { let db = index.content.remap_key_type::>(); let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -169,7 +170,7 @@ mod tests { let db = index.content.remap_key_type::>(); let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -179,7 +180,7 @@ mod tests { let iter = descending_facet_sort(&txn, db, 1, candidates).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -200,7 +201,7 @@ mod tests { let mut results = String::new(); let iter = descending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -209,7 +210,7 @@ mod tests { let mut results = String::new(); let iter = descending_facet_sort(&txn, index.content, 1, candidates).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } @@ -231,7 +232,7 @@ mod tests { let mut results = String::new(); let iter = descending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap(); for el in iter { - let docids = el.unwrap(); + let (docids, _) = el.unwrap(); results.push_str(&display_bitmap(&docids)); results.push('\n'); } From 8ae441a4dbcf18d8f5ddc2650e253a59488d3a67 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 1 Feb 2023 14:40:08 +0100 Subject: [PATCH 2/5] Update usage of iterators --- milli/src/search/criteria/asc_desc.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index b5afe6778..ae43dd36a 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -219,7 +219,7 @@ fn facet_ordered_set_based<'t>( candidates, )?; - Ok(Box::new(number_iter.chain(string_iter))) + Ok(Box::new(number_iter.chain(string_iter).map(|res| res.map(|(doc_ids, _)| doc_ids)))) } /// Returns an iterator over groups of the given candidates in ascending or descending order. From 74859ecd610f82543daff62a7af504460f647f4f Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 25 Jan 2023 15:04:08 +0100 Subject: [PATCH 3/5] Add min and max facet stats --- meilisearch/src/search.rs | 23 ++++++-- milli/src/search/criteria/asc_desc.rs | 35 +++++++++++- milli/src/search/criteria/mod.rs | 1 + milli/src/search/facet/facet_distribution.rs | 59 ++++++++++++++++++++ 4 files changed, 112 insertions(+), 6 deletions(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index f48563141..6bf23cfc4 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -108,7 +108,7 @@ pub struct SearchHit { pub matches_position: Option, } -#[derive(Serialize, Debug, Clone, PartialEq, Eq)] +#[derive(Serialize, Debug, Clone, PartialEq)] #[serde(rename_all = "camelCase")] pub struct SearchResult { pub hits: Vec, @@ -118,6 +118,8 @@ pub struct SearchResult { pub hits_info: HitsInfo, #[serde(skip_serializing_if = "Option::is_none")] pub facet_distribution: Option>>, + #[serde(skip_serializing_if = "Option::is_none")] + pub facet_stats: Option>, } #[derive(Serialize, Debug, Clone, PartialEq, Eq)] @@ -129,6 +131,12 @@ pub enum HitsInfo { OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize }, } +#[derive(Serialize, Debug, Clone, PartialEq)] +pub struct FacetStats { + pub min: f64, + pub max: f64, +} + pub fn perform_search( index: &Index, query: SearchQuery, @@ -300,7 +308,7 @@ pub fn perform_search( HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits } }; - let facet_distribution = match query.facets { + let (facet_distribution, facet_stats) = match query.facets { Some(ref fields) => { let mut facet_distribution = index.facets_distribution(&rtxn); @@ -314,18 +322,23 @@ pub fn perform_search( facet_distribution.facets(fields); } let distribution = facet_distribution.candidates(candidates).execute()?; - - Some(distribution) + let stats = facet_distribution.compute_stats()?; + (Some(distribution), Some(stats)) } - None => None, + None => (None, None), }; + let facet_stats = facet_stats.map(|stats| { + stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect() + }); + let result = SearchResult { hits: documents, hits_info, query: query.q.clone().unwrap_or_default(), processing_time_ms: before_search.elapsed().as_millis(), facet_distribution, + facet_stats, }; Ok(result) } diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index ae43dd36a..a492096d7 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -1,5 +1,6 @@ use std::mem::take; +use heed::BytesDecode; use itertools::Itertools; use log::debug; use ordered_float::OrderedFloat; @@ -7,7 +8,7 @@ use roaring::RoaringBitmap; use super::{Criterion, CriterionParameters, CriterionResult}; use crate::facet::FacetType; -use crate::heed_codec::facet::FacetGroupKeyCodec; +use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec}; use crate::heed_codec::ByteSliceRefCodec; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates}; use crate::search::facet::{ascending_facet_sort, descending_facet_sort}; @@ -196,6 +197,38 @@ fn facet_ordered_iterative<'t>( Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box>) } +fn facet_extreme_value<'t>( + mut extreme_it: Box> + 't>, +) -> Result> { + let extreme_value = + if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) }; + let (_, extreme_value) = extreme_value?; + + Ok(OrderedF64Codec::bytes_decode(extreme_value)) +} + +pub fn facet_min_value<'t>( + index: &'t Index, + rtxn: &'t heed::RoTxn, + field_id: FieldId, + candidates: RoaringBitmap, +) -> Result> { + let db = index.facet_id_f64_docids.remap_key_type::>(); + let it = ascending_facet_sort(rtxn, db, field_id, candidates)?; + facet_extreme_value(it) +} + +pub fn facet_max_value<'t>( + index: &'t Index, + rtxn: &'t heed::RoTxn, + field_id: FieldId, + candidates: RoaringBitmap, +) -> Result> { + let db = index.facet_id_f64_docids.remap_key_type::>(); + let it = descending_facet_sort(rtxn, db, field_id, candidates)?; + facet_extreme_value(it) +} + fn facet_ordered_set_based<'t>( index: &'t Index, rtxn: &'t heed::RoTxn, diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index 06fba1a1b..0c1c8add1 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -21,6 +21,7 @@ use crate::update::{MAX_LENGTH_FOR_PREFIX_PROXIMITY_DB, MAX_PROXIMITY_FOR_PREFIX use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result}; mod asc_desc; +pub use asc_desc::{facet_max_value, facet_min_value}; mod attribute; mod exactness; pub mod r#final; diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 4d5028ce0..72cdb3d33 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -278,6 +278,65 @@ impl<'a> FacetDistribution<'a> { } } + pub fn compute_stats(&self) -> Result> { + let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; + let filterable_fields = self.index.filterable_fields(self.rtxn)?; + let candidates = if let Some(candidates) = self.candidates.clone() { + candidates + } else { + return Ok(Default::default()); + }; + + let fields = match &self.facets { + Some(facets) => { + let invalid_fields: HashSet<_> = facets + .iter() + .filter(|facet| !crate::is_faceted(facet, &filterable_fields)) + .collect(); + if !invalid_fields.is_empty() { + return Err(UserError::InvalidFacetsDistribution { + invalid_facets_name: invalid_fields.into_iter().cloned().collect(), + valid_facets_name: filterable_fields.into_iter().collect(), + } + .into()); + } else { + facets.clone() + } + } + None => filterable_fields, + }; + + let mut distribution = BTreeMap::new(); + for (fid, name) in fields_ids_map.iter() { + if crate::is_faceted(name, &fields) { + let min_value = if let Some(min_value) = crate::search::criteria::facet_min_value( + self.index, + self.rtxn, + fid, + candidates.clone(), + )? { + min_value + } else { + continue; + }; + let max_value = if let Some(max_value) = crate::search::criteria::facet_max_value( + self.index, + self.rtxn, + fid, + candidates.clone(), + )? { + max_value + } else { + continue; + }; + + distribution.insert(name.to_string(), (min_value, max_value)); + } + } + + Ok(distribution) + } + pub fn execute(&self) -> Result>> { let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; let filterable_fields = self.index.filterable_fields(self.rtxn)?; From 9ac981d0255838ec9b2eefc2fb3453be3843ae77 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 1 Feb 2023 14:19:38 +0100 Subject: [PATCH 4/5] Remove some clippy type complexity warns by deboxing iters --- milli/src/search/criteria/asc_desc.rs | 29 ++++++++++--------- .../src/search/facet/facet_sort_ascending.rs | 11 +++++-- .../src/search/facet/facet_sort_descending.rs | 6 ++-- 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index a492096d7..378e1c8da 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -198,7 +198,7 @@ fn facet_ordered_iterative<'t>( } fn facet_extreme_value<'t>( - mut extreme_it: Box> + 't>, + mut extreme_it: impl Iterator> + 't, ) -> Result> { let extreme_value = if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) }; @@ -236,21 +236,22 @@ fn facet_ordered_set_based<'t>( is_ascending: bool, candidates: RoaringBitmap, ) -> Result> + 't>> { - let make_iter = if is_ascending { ascending_facet_sort } else { descending_facet_sort }; + let number_db = + index.facet_id_f64_docids.remap_key_type::>(); + let string_db = + index.facet_id_string_docids.remap_key_type::>(); - let number_iter = make_iter( - rtxn, - index.facet_id_f64_docids.remap_key_type::>(), - field_id, - candidates.clone(), - )?; + let (number_iter, string_iter) = if is_ascending { + let number_iter = ascending_facet_sort(rtxn, number_db, field_id, candidates.clone())?; + let string_iter = ascending_facet_sort(rtxn, string_db, field_id, candidates)?; - let string_iter = make_iter( - rtxn, - index.facet_id_string_docids.remap_key_type::>(), - field_id, - candidates, - )?; + (itertools::Either::Left(number_iter), itertools::Either::Left(string_iter)) + } else { + let number_iter = descending_facet_sort(rtxn, number_db, field_id, candidates.clone())?; + let string_iter = descending_facet_sort(rtxn, string_db, field_id, candidates)?; + + (itertools::Either::Right(number_iter), itertools::Either::Right(string_iter)) + }; Ok(Box::new(number_iter.chain(string_iter).map(|res| res.map(|(doc_ids, _)| doc_ids)))) } diff --git a/milli/src/search/facet/facet_sort_ascending.rs b/milli/src/search/facet/facet_sort_ascending.rs index b0f1dccd5..f59b884de 100644 --- a/milli/src/search/facet/facet_sort_ascending.rs +++ b/milli/src/search/facet/facet_sort_ascending.rs @@ -34,15 +34,20 @@ pub fn ascending_facet_sort<'t>( db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: RoaringBitmap, -) -> Result> + 't>> { +) -> Result> + 't> { let highest_level = get_highest_level(rtxn, db, field_id)?; if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX); - Ok(Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] })) + Ok(itertools::Either::Left(AscendingFacetSort { + rtxn, + db, + field_id, + stack: vec![(candidates, iter)], + })) } else { - Ok(Box::new(std::iter::empty())) + Ok(itertools::Either::Right(std::iter::empty())) } } diff --git a/milli/src/search/facet/facet_sort_descending.rs b/milli/src/search/facet/facet_sort_descending.rs index fbcc41b9d..454b12859 100644 --- a/milli/src/search/facet/facet_sort_descending.rs +++ b/milli/src/search/facet/facet_sort_descending.rs @@ -17,21 +17,21 @@ pub fn descending_facet_sort<'t>( db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: RoaringBitmap, -) -> Result> + 't>> { +) -> Result> + 't> { let highest_level = get_highest_level(rtxn, db, field_id)?; if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; let last_bound = get_last_facet_value::(rtxn, db, field_id)?.unwrap(); let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound }; let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX); - Ok(Box::new(DescendingFacetSort { + Ok(itertools::Either::Left(DescendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter, Bound::Included(last_bound))], })) } else { - Ok(Box::new(std::iter::empty())) + Ok(itertools::Either::Right(std::iter::empty())) } } From eb28d4c525eeb718693f691a140187f0c21d6b53 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 30 Jan 2023 16:02:50 +0100 Subject: [PATCH 5/5] add facet test --- milli/src/search/facet/facet_distribution.rs | 212 +++++++++++++++++++ 1 file changed, 212 insertions(+) diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 72cdb3d33..2aae78bb2 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -596,4 +596,216 @@ mod tests { milli_snap!(format!("{map:?}"), "candidates_0_5_000", @"825f23a4090d05756f46176987b7d992"); } + + #[test] + fn facet_stats() { + let mut index = TempIndex::new_with_map_size(4096 * 10_000); + index.index_documents_config.autogenerate_docids = true; + + index + .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .unwrap(); + + let facet_values = (0..1000).into_iter().collect::>(); + + let mut documents = vec![]; + for i in 0..1000 { + let document = serde_json::json!({ + "colour": facet_values[i % 1000], + }) + .as_object() + .unwrap() + .clone(); + documents.push(document); + } + + let documents = documents_batch_reader_from_objects(documents); + + index.add_documents(documents).unwrap(); + + let txn = index.read_txn().unwrap(); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "no_candidates", @"{}"); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((0..1000).into_iter().collect()) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((217..777).into_iter().collect()) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 776.0)}"###); + } + + #[test] + fn facet_stats_array() { + let mut index = TempIndex::new_with_map_size(4096 * 10_000); + index.index_documents_config.autogenerate_docids = true; + + index + .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .unwrap(); + + let facet_values = (0..1000).into_iter().collect::>(); + + let mut documents = vec![]; + for i in 0..1000 { + let document = serde_json::json!({ + "colour": [facet_values[i % 1000], facet_values[i % 1000] + 1000], + }) + .as_object() + .unwrap() + .clone(); + documents.push(document); + } + + let documents = documents_batch_reader_from_objects(documents); + + index.add_documents(documents).unwrap(); + + let txn = index.read_txn().unwrap(); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "no_candidates", @"{}"); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((0..1000).into_iter().collect()) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1999.0)}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((217..777).into_iter().collect()) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 1776.0)}"###); + } + + #[test] + fn facet_stats_mixed_array() { + let mut index = TempIndex::new_with_map_size(4096 * 10_000); + index.index_documents_config.autogenerate_docids = true; + + index + .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .unwrap(); + + let facet_values = (0..1000).into_iter().collect::>(); + + let mut documents = vec![]; + for i in 0..1000 { + let document = serde_json::json!({ + "colour": [facet_values[i % 1000], format!("{}", facet_values[i % 1000] + 1000)], + }) + .as_object() + .unwrap() + .clone(); + documents.push(document); + } + + let documents = documents_batch_reader_from_objects(documents); + + index.add_documents(documents).unwrap(); + + let txn = index.read_txn().unwrap(); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "no_candidates", @"{}"); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((0..1000).into_iter().collect()) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((217..777).into_iter().collect()) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 776.0)}"###); + } + + #[test] + fn facet_mixed_values() { + let mut index = TempIndex::new_with_map_size(4096 * 10_000); + index.index_documents_config.autogenerate_docids = true; + + index + .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .unwrap(); + + let facet_values = (0..1000).into_iter().collect::>(); + + let mut documents = vec![]; + for i in 0..1000 { + let document = if i % 2 == 0 { + serde_json::json!({ + "colour": [facet_values[i % 1000], facet_values[i % 1000] + 1000], + }) + } else { + serde_json::json!({ + "colour": format!("{}", facet_values[i % 1000] + 10000), + }) + }; + let document = document.as_object().unwrap().clone(); + documents.push(document); + } + + let documents = documents_batch_reader_from_objects(documents); + + index.add_documents(documents).unwrap(); + + let txn = index.read_txn().unwrap(); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "no_candidates", @"{}"); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((0..1000).into_iter().collect()) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1998.0)}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((217..777).into_iter().collect()) + .compute_stats() + .unwrap(); + + milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (218.0, 1776.0)}"###); + } }