From 34b2e98fe907a0a0036f911b17db42874e3225a8 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 29 May 2023 15:32:09 +0200 Subject: [PATCH] Expose a sortFacetValuesBy parameter to the user --- meilisearch/src/routes/indexes/search.rs | 9 ++++-- meilisearch/src/search.rs | 33 ++++++++++++++++++-- milli/src/lib.rs | 2 +- milli/src/search/facet/facet_distribution.rs | 10 +++--- milli/src/search/facet/mod.rs | 2 +- milli/src/search/mod.rs | 2 +- 6 files changed, 45 insertions(+), 13 deletions(-) diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index a79f95ee4..2ee8c44f7 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -16,9 +16,9 @@ use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; use crate::search::{ - add_search_rules, perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, - DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, - DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, + add_search_rules, perform_search, FacetValuesSort, MatchingStrategy, SearchQuery, + DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, + DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, }; pub fn configure(cfg: &mut web::ServiceConfig) { @@ -64,6 +64,8 @@ pub struct SearchQueryGet { show_ranking_score_details: Param, #[deserr(default, error = DeserrQueryParamError)] facets: Option>, + #[deserr(default, error = DeserrQueryParamError)] + sort_facet_values_by: Option, #[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError)] highlight_pre_tag: String, #[deserr( default = DEFAULT_HIGHLIGHT_POST_TAG(), error = DeserrQueryParamError)] @@ -103,6 +105,7 @@ impl From for SearchQuery { show_ranking_score: other.show_ranking_score.0, show_ranking_score_details: other.show_ranking_score_details.0, facets: other.facets.map(|o| o.into_iter().collect()), + sort_facet_values_by: other.sort_facet_values_by, highlight_pre_tag: other.highlight_pre_tag, highlight_post_tag: other.highlight_post_tag, crop_marker: other.crop_marker, diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index bebf80084..ed2378d94 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -14,7 +14,7 @@ use meilisearch_types::heed::RoTxn; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy}; use meilisearch_types::milli::{ - dot_product_similarity, FacetValueHit, InternalError, SearchForFacetValues, + dot_product_similarity, FacetValueHit, OrderBy, InternalError, SearchForFacetValues, }; use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::{milli, Document}; @@ -74,6 +74,8 @@ pub struct SearchQuery { pub sort: Option>, #[deserr(default, error = DeserrJsonError)] pub facets: Option>, + #[deserr(default, error = DeserrJsonError)] // TODO + pub sort_facet_values_by: Option, #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_PRE_TAG())] pub highlight_pre_tag: String, #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_POST_TAG())] @@ -133,6 +135,8 @@ pub struct SearchQueryWithIndex { pub sort: Option>, #[deserr(default, error = DeserrJsonError)] pub facets: Option>, + #[deserr(default, error = DeserrJsonError)] // TODO + pub sort_facet_values_by: Option, #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_PRE_TAG())] pub highlight_pre_tag: String, #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_POST_TAG())] @@ -165,6 +169,7 @@ impl SearchQueryWithIndex { filter, sort, facets, + sort_facet_values_by, highlight_pre_tag, highlight_post_tag, crop_marker, @@ -190,6 +195,7 @@ impl SearchQueryWithIndex { filter, sort, facets, + sort_facet_values_by, highlight_pre_tag, highlight_post_tag, crop_marker, @@ -226,6 +232,26 @@ impl From for TermsMatchingStrategy { } } +#[derive(Debug, Default, Clone, PartialEq, Eq, Deserr)] +#[deserr(rename_all = camelCase)] +pub enum FacetValuesSort { + /// Facet values are sorted by decreasing count. + /// The count is the number of records containing this facet value in the results of the query. + #[default] + Alpha, + /// Facet values are sorted in alphabetical order, ascending from A to Z. + Count, +} + +impl Into for FacetValuesSort { + fn into(self) -> OrderBy { + match self { + FacetValuesSort::Alpha => OrderBy::Lexicographic, + FacetValuesSort::Count => OrderBy::Count, + } + } +} + #[derive(Debug, Clone, Serialize, PartialEq)] pub struct SearchHit { #[serde(flatten)] @@ -557,7 +583,10 @@ pub fn perform_search( if fields.iter().all(|f| f != "*") { facet_distribution.facets(fields); } - let distribution = facet_distribution.candidates(candidates).execute()?; + let distribution = facet_distribution + .candidates(candidates) + .order_by(query.sort_facet_values_by.map_or_else(Default::default, Into::into)) + .execute()?; let stats = facet_distribution.compute_stats()?; (Some(distribution), Some(stats)) } diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 4360eb38e..55b283931 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -58,7 +58,7 @@ pub use self::heed_codec::{ pub use self::index::Index; pub use self::search::{ FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder, - MatchingWords, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy, + MatchingWords, OrderBy, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, }; diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 3d277013a..3cc970049 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -12,11 +12,10 @@ use crate::heed_codec::facet::{ FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec, }; use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec}; -use crate::search::facet::facet_distribution_iter; -use crate::{FieldId, Index, Result}; -use facet_distribution_iter::{ +use crate::search::facet::facet_distribution_iter::{ count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution, }; +use crate::{FieldId, Index, Result}; /// The default number of values by facets that will /// be fetched from the key-value store. @@ -27,9 +26,10 @@ pub const DEFAULT_VALUES_PER_FACET: usize = 100; const CANDIDATES_THRESHOLD: u64 = 3000; /// How should we fetch the facets? -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum OrderBy { /// By lexicographic order... + #[default] Lexicographic, /// Or by number of docids in common? Count, @@ -50,7 +50,7 @@ impl<'a> FacetDistribution<'a> { facets: None, candidates: None, max_values_per_facet: DEFAULT_VALUES_PER_FACET, - order_by: OrderBy::Count, + order_by: OrderBy::default(), rtxn, index, } diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index 51f1bf005..ebc9e1da0 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -4,7 +4,7 @@ use heed::types::{ByteSlice, DecodeIgnore}; use heed::{BytesDecode, RoTxn}; use roaring::RoaringBitmap; -pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET}; +pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::filter::{BadGeoError, Filter}; use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec}; use crate::heed_codec::ByteSliceRefCodec; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index e05562f8e..65e78caa9 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -7,7 +7,7 @@ use log::error; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; -pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET}; +pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords}; use self::new::PartialSearchResult; use crate::error::UserError;