From 74859ecd610f82543daff62a7af504460f647f4f Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 25 Jan 2023 15:04:08 +0100 Subject: [PATCH] Add min and max facet stats --- meilisearch/src/search.rs | 23 ++++++-- milli/src/search/criteria/asc_desc.rs | 35 +++++++++++- milli/src/search/criteria/mod.rs | 1 + milli/src/search/facet/facet_distribution.rs | 59 ++++++++++++++++++++ 4 files changed, 112 insertions(+), 6 deletions(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index f48563141..6bf23cfc4 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -108,7 +108,7 @@ pub struct SearchHit { pub matches_position: Option, } -#[derive(Serialize, Debug, Clone, PartialEq, Eq)] +#[derive(Serialize, Debug, Clone, PartialEq)] #[serde(rename_all = "camelCase")] pub struct SearchResult { pub hits: Vec, @@ -118,6 +118,8 @@ pub struct SearchResult { pub hits_info: HitsInfo, #[serde(skip_serializing_if = "Option::is_none")] pub facet_distribution: Option>>, + #[serde(skip_serializing_if = "Option::is_none")] + pub facet_stats: Option>, } #[derive(Serialize, Debug, Clone, PartialEq, Eq)] @@ -129,6 +131,12 @@ pub enum HitsInfo { OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize }, } +#[derive(Serialize, Debug, Clone, PartialEq)] +pub struct FacetStats { + pub min: f64, + pub max: f64, +} + pub fn perform_search( index: &Index, query: SearchQuery, @@ -300,7 +308,7 @@ pub fn perform_search( HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits } }; - let facet_distribution = match query.facets { + let (facet_distribution, facet_stats) = match query.facets { Some(ref fields) => { let mut facet_distribution = index.facets_distribution(&rtxn); @@ -314,18 +322,23 @@ pub fn perform_search( facet_distribution.facets(fields); } let distribution = facet_distribution.candidates(candidates).execute()?; - - Some(distribution) + let stats = facet_distribution.compute_stats()?; + (Some(distribution), Some(stats)) } - None => None, + None => (None, None), }; + let facet_stats = facet_stats.map(|stats| { + stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect() + }); + let result = SearchResult { hits: documents, hits_info, query: query.q.clone().unwrap_or_default(), processing_time_ms: before_search.elapsed().as_millis(), facet_distribution, + facet_stats, }; Ok(result) } diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index ae43dd36a..a492096d7 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -1,5 +1,6 @@ use std::mem::take; +use heed::BytesDecode; use itertools::Itertools; use log::debug; use ordered_float::OrderedFloat; @@ -7,7 +8,7 @@ use roaring::RoaringBitmap; use super::{Criterion, CriterionParameters, CriterionResult}; use crate::facet::FacetType; -use crate::heed_codec::facet::FacetGroupKeyCodec; +use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec}; use crate::heed_codec::ByteSliceRefCodec; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates}; use crate::search::facet::{ascending_facet_sort, descending_facet_sort}; @@ -196,6 +197,38 @@ fn facet_ordered_iterative<'t>( Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box>) } +fn facet_extreme_value<'t>( + mut extreme_it: Box> + 't>, +) -> Result> { + let extreme_value = + if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) }; + let (_, extreme_value) = extreme_value?; + + Ok(OrderedF64Codec::bytes_decode(extreme_value)) +} + +pub fn facet_min_value<'t>( + index: &'t Index, + rtxn: &'t heed::RoTxn, + field_id: FieldId, + candidates: RoaringBitmap, +) -> Result> { + let db = index.facet_id_f64_docids.remap_key_type::>(); + let it = ascending_facet_sort(rtxn, db, field_id, candidates)?; + facet_extreme_value(it) +} + +pub fn facet_max_value<'t>( + index: &'t Index, + rtxn: &'t heed::RoTxn, + field_id: FieldId, + candidates: RoaringBitmap, +) -> Result> { + let db = index.facet_id_f64_docids.remap_key_type::>(); + let it = descending_facet_sort(rtxn, db, field_id, candidates)?; + facet_extreme_value(it) +} + fn facet_ordered_set_based<'t>( index: &'t Index, rtxn: &'t heed::RoTxn, diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index 06fba1a1b..0c1c8add1 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -21,6 +21,7 @@ use crate::update::{MAX_LENGTH_FOR_PREFIX_PROXIMITY_DB, MAX_PROXIMITY_FOR_PREFIX use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result}; mod asc_desc; +pub use asc_desc::{facet_max_value, facet_min_value}; mod attribute; mod exactness; pub mod r#final; diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 4d5028ce0..72cdb3d33 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -278,6 +278,65 @@ impl<'a> FacetDistribution<'a> { } } + pub fn compute_stats(&self) -> Result> { + let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; + let filterable_fields = self.index.filterable_fields(self.rtxn)?; + let candidates = if let Some(candidates) = self.candidates.clone() { + candidates + } else { + return Ok(Default::default()); + }; + + let fields = match &self.facets { + Some(facets) => { + let invalid_fields: HashSet<_> = facets + .iter() + .filter(|facet| !crate::is_faceted(facet, &filterable_fields)) + .collect(); + if !invalid_fields.is_empty() { + return Err(UserError::InvalidFacetsDistribution { + invalid_facets_name: invalid_fields.into_iter().cloned().collect(), + valid_facets_name: filterable_fields.into_iter().collect(), + } + .into()); + } else { + facets.clone() + } + } + None => filterable_fields, + }; + + let mut distribution = BTreeMap::new(); + for (fid, name) in fields_ids_map.iter() { + if crate::is_faceted(name, &fields) { + let min_value = if let Some(min_value) = crate::search::criteria::facet_min_value( + self.index, + self.rtxn, + fid, + candidates.clone(), + )? { + min_value + } else { + continue; + }; + let max_value = if let Some(max_value) = crate::search::criteria::facet_max_value( + self.index, + self.rtxn, + fid, + candidates.clone(), + )? { + max_value + } else { + continue; + }; + + distribution.insert(name.to_string(), (min_value, max_value)); + } + } + + Ok(distribution) + } + pub fn execute(&self) -> Result>> { let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; let filterable_fields = self.index.filterable_fields(self.rtxn)?;