diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index aed29e612..ebd808b42 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -597,6 +597,9 @@ pub struct SearchAggregator { // every time a request has a filter, this field must be incremented by one sort_total_number_of_criteria: usize, + // distinct + distinct: bool, + // filter filter_with_geo_radius: bool, filter_with_geo_bounding_box: bool, @@ -670,6 +673,7 @@ impl SearchAggregator { show_ranking_score_details, filter, sort, + distinct, facets: _, highlight_pre_tag, highlight_post_tag, @@ -692,6 +696,8 @@ impl SearchAggregator { ret.sort_sum_of_criteria_terms = sort.len(); } + ret.distinct = distinct.is_some(); + if let Some(ref filter) = filter { static RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); ret.filter_total_number_of_criteria = 1; @@ -795,6 +801,7 @@ impl SearchAggregator { sort_with_geo_point, sort_sum_of_criteria_terms, sort_total_number_of_criteria, + distinct, filter_with_geo_radius, filter_with_geo_bounding_box, filter_sum_of_criteria_terms, @@ -851,6 +858,9 @@ impl SearchAggregator { self.sort_total_number_of_criteria = self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria); + // distinct + self.distinct |= distinct; + // filter self.filter_with_geo_radius |= filter_with_geo_radius; self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box; @@ -921,6 +931,7 @@ impl SearchAggregator { sort_with_geo_point, sort_sum_of_criteria_terms, sort_total_number_of_criteria, + distinct, filter_with_geo_radius, filter_with_geo_bounding_box, filter_sum_of_criteria_terms, @@ -977,6 +988,8 @@ impl SearchAggregator { "with_geoPoint": sort_with_geo_point, "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), }, + // TODO ask help from MarĂ­a + "distinct": distinct, "filter": { "with_geoRadius": filter_with_geo_radius, "with_geoBoundingBox": filter_with_geo_bounding_box, @@ -1087,6 +1100,7 @@ impl MultiSearchAggregator { show_matches_position: _, filter: _, sort: _, + distinct: _, facets: _, highlight_pre_tag: _, highlight_post_tag: _, diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 10b371f2d..4b3f73115 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -123,6 +123,7 @@ impl From for SearchQuery { show_ranking_score_details: false, filter, sort: None, + distinct: None, facets: None, highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(), highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(), diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 348d8295c..6ea6802d9 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -61,6 +61,9 @@ pub struct SearchQueryGet { filter: Option, #[deserr(default, error = DeserrQueryParamError)] sort: Option, + // TODO change the InvalidSearchSort to InvalidSearchDistinct error + #[deserr(default, error = DeserrQueryParamError)] + distinct: Option, #[deserr(default, error = DeserrQueryParamError)] show_matches_position: Param, #[deserr(default, error = DeserrQueryParamError)] @@ -158,6 +161,7 @@ impl From for SearchQuery { attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()), filter, sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)), + distinct: other.distinct, show_matches_position: other.show_matches_position.0, show_ranking_score: other.show_ranking_score.0, show_ranking_score_details: other.show_ranking_score_details.0, diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 05b3c1aff..edc3feb5d 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -75,6 +75,9 @@ pub struct SearchQuery { pub filter: Option, #[deserr(default, error = DeserrJsonError)] pub sort: Option>, + // TODO Change the error to InvalidSearchDistinct + #[deserr(default, error = DeserrJsonError)] + pub distinct: Option, #[deserr(default, error = DeserrJsonError)] pub facets: Option>, #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_PRE_TAG())] @@ -149,6 +152,7 @@ impl fmt::Debug for SearchQuery { show_ranking_score_details, filter, sort, + distinct, facets, highlight_pre_tag, highlight_post_tag, @@ -195,6 +199,9 @@ impl fmt::Debug for SearchQuery { if let Some(sort) = sort { debug.field("sort", &sort); } + if let Some(distinct) = distinct { + debug.field("distinct", &distinct); + } if let Some(facets) = facets { debug.field("facets", &facets); } @@ -386,6 +393,9 @@ pub struct SearchQueryWithIndex { pub filter: Option, #[deserr(default, error = DeserrJsonError)] pub sort: Option>, + // TODO change error to InvalidSearchDistinct + #[deserr(default, error = DeserrJsonError)] + pub distinct: Option, #[deserr(default, error = DeserrJsonError)] pub facets: Option>, #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_PRE_TAG())] @@ -421,6 +431,7 @@ impl SearchQueryWithIndex { show_matches_position, filter, sort, + distinct, facets, highlight_pre_tag, highlight_post_tag, @@ -448,6 +459,7 @@ impl SearchQueryWithIndex { show_matches_position, filter, sort, + distinct, facets, highlight_pre_tag, highlight_post_tag, @@ -716,6 +728,10 @@ fn prepare_search<'t>( search.ranking_score_threshold(ranking_score_threshold.0); } + if let Some(distinct) = &query.distinct { + search.distinct(distinct.clone()); + } + match search_kind { SearchKind::KeywordOnly => { if let Some(q) = &query.q { @@ -866,6 +882,7 @@ pub fn perform_search( matching_strategy: _, attributes_to_search_on: _, filter: _, + distinct: _, } = query; let format = AttributesFormat { diff --git a/milli/examples/search.rs b/milli/examples/search.rs index 0195c396f..87020994a 100644 --- a/milli/examples/search.rs +++ b/milli/examples/search.rs @@ -59,6 +59,7 @@ fn main() -> Result<(), Box> { false, universe, &None, + &None, GeoSortStrategy::default(), 0, 20, diff --git a/milli/src/search/hybrid.rs b/milli/src/search/hybrid.rs index 87f922c4c..1c784097d 100644 --- a/milli/src/search/hybrid.rs +++ b/milli/src/search/hybrid.rs @@ -159,6 +159,7 @@ impl<'a> Search<'a> { offset: 0, limit: self.limit + self.offset, sort_criteria: self.sort_criteria.clone(), + distinct: self.distinct.clone(), searchable_attributes: self.searchable_attributes, geo_strategy: self.geo_strategy, terms_matching_strategy: self.terms_matching_strategy, diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 49d73ff31..d937875da 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -40,6 +40,7 @@ pub struct Search<'a> { offset: usize, limit: usize, sort_criteria: Option>, + distinct: Option, searchable_attributes: Option<&'a [String]>, geo_strategy: new::GeoSortStrategy, terms_matching_strategy: TermsMatchingStrategy, @@ -61,6 +62,7 @@ impl<'a> Search<'a> { offset: 0, limit: 20, sort_criteria: None, + distinct: None, searchable_attributes: None, geo_strategy: new::GeoSortStrategy::default(), terms_matching_strategy: TermsMatchingStrategy::default(), @@ -105,6 +107,11 @@ impl<'a> Search<'a> { self } + pub fn distinct(&mut self, distinct: String) -> &mut Search<'a> { + self.distinct = Some(distinct); + self + } + pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> { self.searchable_attributes = Some(searchable); self @@ -169,6 +176,13 @@ impl<'a> Search<'a> { ctx.attributes_to_search_on(searchable_attributes)?; } + if let Some(distinct) = &self.distinct { + if !ctx.index.filterable_fields(ctx.txn)?.contains(distinct) { + // TODO return a real error message + panic!("Distinct search field is not a filterable attribute"); + } + } + let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?; let PartialSearchResult { located_query_terms, @@ -185,6 +199,7 @@ impl<'a> Search<'a> { self.scoring_strategy, universe, &self.sort_criteria, + &self.distinct, self.geo_strategy, self.offset, self.limit, @@ -202,6 +217,7 @@ impl<'a> Search<'a> { self.exhaustive_number_hits, universe, &self.sort_criteria, + &self.distinct, self.geo_strategy, self.offset, self.limit, @@ -238,6 +254,7 @@ impl fmt::Debug for Search<'_> { offset, limit, sort_criteria, + distinct, searchable_attributes, geo_strategy: _, terms_matching_strategy, @@ -257,6 +274,7 @@ impl fmt::Debug for Search<'_> { .field("offset", offset) .field("limit", limit) .field("sort_criteria", sort_criteria) + .field("distinct", distinct) .field("searchable_attributes", searchable_attributes) .field("terms_matching_strategy", terms_matching_strategy) .field("scoring_strategy", scoring_strategy) diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index d937c78bf..9255e4c09 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -22,6 +22,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( ctx: &mut SearchContext<'ctx>, mut ranking_rules: Vec>, query: &Q, + distinct: Option<&str>, universe: &RoaringBitmap, from: usize, length: usize, @@ -34,7 +35,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( logger.ranking_rules(&ranking_rules); logger.initial_universe(universe); - let distinct_fid = if let Some(field) = ctx.index.distinct_field(ctx.txn)? { + let distinct_field = match distinct { + Some(distinct) => Some(distinct), + None => ctx.index.distinct_field(ctx.txn)?, + }; + + let distinct_fid = if let Some(field) = distinct_field { ctx.index.fields_ids_map(ctx.txn)?.id(field) } else { None diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs index 87ddb2915..77ae5fcd5 100644 --- a/milli/src/search/new/matches/mod.rs +++ b/milli/src/search/new/matches/mod.rs @@ -516,6 +516,7 @@ mod tests { false, universe, &None, + &None, crate::search::new::GeoSortStrategy::default(), 0, 100, diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 623c72567..257f81539 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -567,6 +567,7 @@ pub fn execute_vector_search( scoring_strategy: ScoringStrategy, universe: RoaringBitmap, sort_criteria: &Option>, + distinct: &Option, geo_strategy: geo_sort::Strategy, from: usize, length: usize, @@ -597,6 +598,7 @@ pub fn execute_vector_search( ctx, ranking_rules, &PlaceholderQuery, + distinct.as_deref(), &universe, from, length, @@ -626,6 +628,7 @@ pub fn execute_search( exhaustive_number_hits: bool, mut universe: RoaringBitmap, sort_criteria: &Option>, + distinct: &Option, geo_strategy: geo_sort::Strategy, from: usize, length: usize, @@ -716,6 +719,7 @@ pub fn execute_search( ctx, ranking_rules, &graph, + distinct.as_deref(), &universe, from, length, @@ -731,6 +735,7 @@ pub fn execute_search( ctx, ranking_rules, &PlaceholderQuery, + distinct.as_deref(), &universe, from, length, @@ -747,7 +752,14 @@ pub fn execute_search( // The candidates is the universe unless the exhaustive number of hits // is requested and a distinct attribute is set. if exhaustive_number_hits { - if let Some(f) = ctx.index.distinct_field(ctx.txn)? { + // TODO Should the distinct search parameter replace the distinct setting? + // Or should we return an error if the distinct search param is set at the same time as the setting is set? + let distinct_field = match distinct.as_deref() { + Some(distinct) => Some(distinct), + None => ctx.index.distinct_field(ctx.txn)?, + }; + + if let Some(f) = distinct_field { if let Some(distinct_fid) = fields_ids_map.id(f) { all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining; }