4693: Introduce distinct attributes at search time r=irevoire a=Kerollmops

This PR fixes #4611.

### To Do
- [x] Remove the `distinguishableAttributes` settings (not even a commit about that).
- [x] Use the `filterableAttributes` to be able to use the `distinct` parameter at search.
- [x] Work on the errors and make tests.

Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-06-18 07:45:03 +00:00 committed by GitHub
commit e580d6b98f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 315 additions and 14 deletions

View File

@ -273,6 +273,7 @@ InvalidSimilarShowRankingScore , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ; InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ; InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSearchSort , InvalidRequest , BAD_REQUEST ; InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ; InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ; InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
@ -384,6 +385,7 @@ impl ErrorCode for milli::Error {
Code::IndexPrimaryKeyMultipleCandidatesFound Code::IndexPrimaryKeyMultipleCandidatesFound
} }
UserError::PrimaryKeyCannotBeChanged(_) => Code::IndexPrimaryKeyAlreadyExists, UserError::PrimaryKeyCannotBeChanged(_) => Code::IndexPrimaryKeyAlreadyExists,
UserError::InvalidDistinctAttribute { .. } => Code::InvalidSearchDistinct,
UserError::SortRankingRuleMissing => Code::InvalidSearchSort, UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets, UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort, UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,

View File

@ -597,6 +597,9 @@ pub struct SearchAggregator {
// every time a request has a filter, this field must be incremented by one // every time a request has a filter, this field must be incremented by one
sort_total_number_of_criteria: usize, sort_total_number_of_criteria: usize,
// distinct
distinct: bool,
// filter // filter
filter_with_geo_radius: bool, filter_with_geo_radius: bool,
filter_with_geo_bounding_box: bool, filter_with_geo_bounding_box: bool,
@ -672,6 +675,7 @@ impl SearchAggregator {
show_ranking_score_details, show_ranking_score_details,
filter, filter,
sort, sort,
distinct,
facets: _, facets: _,
highlight_pre_tag, highlight_pre_tag,
highlight_post_tag, highlight_post_tag,
@ -694,6 +698,8 @@ impl SearchAggregator {
ret.sort_sum_of_criteria_terms = sort.len(); ret.sort_sum_of_criteria_terms = sort.len();
} }
ret.distinct = distinct.is_some();
if let Some(ref filter) = filter { if let Some(ref filter) = filter {
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap()); static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
ret.filter_total_number_of_criteria = 1; ret.filter_total_number_of_criteria = 1;
@ -798,6 +804,7 @@ impl SearchAggregator {
sort_with_geo_point, sort_with_geo_point,
sort_sum_of_criteria_terms, sort_sum_of_criteria_terms,
sort_total_number_of_criteria, sort_total_number_of_criteria,
distinct,
filter_with_geo_radius, filter_with_geo_radius,
filter_with_geo_bounding_box, filter_with_geo_bounding_box,
filter_sum_of_criteria_terms, filter_sum_of_criteria_terms,
@ -855,6 +862,9 @@ impl SearchAggregator {
self.sort_total_number_of_criteria = self.sort_total_number_of_criteria =
self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria); self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria);
// distinct
self.distinct |= distinct;
// filter // filter
self.filter_with_geo_radius |= filter_with_geo_radius; self.filter_with_geo_radius |= filter_with_geo_radius;
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box; self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
@ -926,6 +936,7 @@ impl SearchAggregator {
sort_with_geo_point, sort_with_geo_point,
sort_sum_of_criteria_terms, sort_sum_of_criteria_terms,
sort_total_number_of_criteria, sort_total_number_of_criteria,
distinct,
filter_with_geo_radius, filter_with_geo_radius,
filter_with_geo_bounding_box, filter_with_geo_bounding_box,
filter_sum_of_criteria_terms, filter_sum_of_criteria_terms,
@ -983,6 +994,7 @@ impl SearchAggregator {
"with_geoPoint": sort_with_geo_point, "with_geoPoint": sort_with_geo_point,
"avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
}, },
"distinct": distinct,
"filter": { "filter": {
"with_geoRadius": filter_with_geo_radius, "with_geoRadius": filter_with_geo_radius,
"with_geoBoundingBox": filter_with_geo_bounding_box, "with_geoBoundingBox": filter_with_geo_bounding_box,
@ -1095,6 +1107,7 @@ impl MultiSearchAggregator {
show_matches_position: _, show_matches_position: _,
filter: _, filter: _,
sort: _, sort: _,
distinct: _,
facets: _, facets: _,
highlight_pre_tag: _, highlight_pre_tag: _,
highlight_post_tag: _, highlight_post_tag: _,

View File

@ -124,6 +124,7 @@ impl From<FacetSearchQuery> for SearchQuery {
show_ranking_score_details: false, show_ranking_score_details: false,
filter, filter,
sort: None, sort: None,
distinct: None,
facets: None, facets: None,
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(), highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(), highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),

View File

@ -63,6 +63,8 @@ pub struct SearchQueryGet {
filter: Option<String>, filter: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)] #[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
sort: Option<String>, sort: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchDistinct>)]
distinct: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)] #[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
show_matches_position: Param<bool>, show_matches_position: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)] #[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)]
@ -161,6 +163,7 @@ impl From<SearchQueryGet> for SearchQuery {
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()), attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
filter, filter,
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)), sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
distinct: other.distinct,
show_matches_position: other.show_matches_position.0, show_matches_position: other.show_matches_position.0,
show_ranking_score: other.show_ranking_score.0, show_ranking_score: other.show_ranking_score.0,
show_ranking_score_details: other.show_ranking_score_details.0, show_ranking_score_details: other.show_ranking_score_details.0,

View File

@ -78,6 +78,8 @@ pub struct SearchQuery {
pub filter: Option<Value>, pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)] #[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub sort: Option<Vec<String>>, pub sort: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchDistinct>)]
pub distinct: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)] #[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
pub facets: Option<Vec<String>>, pub facets: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())] #[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
@ -153,6 +155,7 @@ impl fmt::Debug for SearchQuery {
show_ranking_score_details, show_ranking_score_details,
filter, filter,
sort, sort,
distinct,
facets, facets,
highlight_pre_tag, highlight_pre_tag,
highlight_post_tag, highlight_post_tag,
@ -202,6 +205,9 @@ impl fmt::Debug for SearchQuery {
if let Some(sort) = sort { if let Some(sort) = sort {
debug.field("sort", &sort); debug.field("sort", &sort);
} }
if let Some(distinct) = distinct {
debug.field("distinct", &distinct);
}
if let Some(facets) = facets { if let Some(facets) = facets {
debug.field("facets", &facets); debug.field("facets", &facets);
} }
@ -395,6 +401,8 @@ pub struct SearchQueryWithIndex {
pub filter: Option<Value>, pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)] #[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub sort: Option<Vec<String>>, pub sort: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchDistinct>)]
pub distinct: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)] #[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
pub facets: Option<Vec<String>>, pub facets: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())] #[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
@ -431,6 +439,7 @@ impl SearchQueryWithIndex {
show_matches_position, show_matches_position,
filter, filter,
sort, sort,
distinct,
facets, facets,
highlight_pre_tag, highlight_pre_tag,
highlight_post_tag, highlight_post_tag,
@ -459,6 +468,7 @@ impl SearchQueryWithIndex {
show_matches_position, show_matches_position,
filter, filter,
sort, sort,
distinct,
facets, facets,
highlight_pre_tag, highlight_pre_tag,
highlight_post_tag, highlight_post_tag,
@ -729,6 +739,10 @@ fn prepare_search<'t>(
search.ranking_score_threshold(ranking_score_threshold.0); search.ranking_score_threshold(ranking_score_threshold.0);
} }
if let Some(distinct) = &query.distinct {
search.distinct(distinct.clone());
}
match search_kind { match search_kind {
SearchKind::KeywordOnly => { SearchKind::KeywordOnly => {
if let Some(q) = &query.q { if let Some(q) = &query.q {
@ -882,6 +896,7 @@ pub fn perform_search(
matching_strategy: _, matching_strategy: _,
attributes_to_search_on: _, attributes_to_search_on: _,
filter: _, filter: _,
distinct: _,
} = query; } = query;
let format = AttributesFormat { let format = AttributesFormat {

View File

@ -107,6 +107,39 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
]) ])
}); });
static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"id": 1,
"description": "Leather Jacket",
"brand": "Lee Jeans",
"product_id": "123456",
"color": { "main": "Brown", "pattern": "stripped" },
},
{
"id": 2,
"description": "Leather Jacket",
"brand": "Lee Jeans",
"product_id": "123456",
"color": { "main": "Black", "pattern": "stripped" },
},
{
"id": 3,
"description": "Leather Jacket",
"brand": "Lee Jeans",
"product_id": "123456",
"color": { "main": "Blue", "pattern": "used" },
},
{
"id": 4,
"description": "T-Shirt",
"brand": "Nike",
"product_id": "789012",
"color": { "main": "Blue", "pattern": "stripped" },
}
])
});
static DOCUMENT_PRIMARY_KEY: &str = "id"; static DOCUMENT_PRIMARY_KEY: &str = "id";
static DOCUMENT_DISTINCT_KEY: &str = "product_id"; static DOCUMENT_DISTINCT_KEY: &str = "product_id";
@ -239,3 +272,35 @@ async fn distinct_search_with_pagination_no_ranking() {
snapshot!(response["totalPages"], @"2"); snapshot!(response["totalPages"], @"2");
snapshot!(response["totalHits"], @"6"); snapshot!(response["totalHits"], @"6");
} }
#[actix_rt::test]
async fn distinct_at_search_time() {
let server = Server::new().await;
let index = server.index("tamo");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
let (task, _) = index.update_settings_filterable_attributes(json!(["color.main"])).await;
let task = index.wait_task(task.uid()).await;
snapshot!(task, name: "succeed");
fn get_hits(response: &Value) -> Vec<String> {
let hits_array = response["hits"]
.as_array()
.unwrap_or_else(|| panic!("{}", &serde_json::to_string_pretty(&response).unwrap()));
hits_array
.iter()
.map(|h| h[DOCUMENT_PRIMARY_KEY].as_number().unwrap().to_string())
.collect::<Vec<_>>()
}
let (response, code) =
index.search_post(json!({"page": 1, "hitsPerPage": 3, "distinct": "color.main"})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"3");
snapshot!(format!("{:?}", hits), @r###"["1", "2", "3"]"###);
snapshot!(response["page"], @"1");
snapshot!(response["totalPages"], @"1");
snapshot!(response["totalHits"], @"3");
}

View File

@ -1140,3 +1140,66 @@ async fn search_on_unknown_field_plus_joker() {
) )
.await; .await;
} }
#[actix_rt::test]
async fn distinct_at_search_time() {
let server = Server::new().await;
let index = server.index("tamo");
let (task, _) = index.create(None).await;
let task = index.wait_task(task.uid()).await;
snapshot!(task, name: "task-succeed");
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.",
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
}
"###);
let (task, _) = index.update_settings_filterable_attributes(json!(["color", "machin"])).await;
index.wait_task(task.uid()).await;
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.",
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
}
"###);
let (task, _) = index.update_settings_displayed_attributes(json!(["color"])).await;
index.wait_task(task.uid()).await;
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.",
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
}
"###);
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": true})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.distinct`: expected a string, but found a boolean: `true`",
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
}
"###);
}

View File

@ -0,0 +1,20 @@
---
source: meilisearch/tests/search/distinct.rs
---
{
"uid": 1,
"indexUid": "tamo",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"filterableAttributes": [
"color.main"
]
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}

View File

@ -0,0 +1,18 @@
---
source: meilisearch/tests/search/errors.rs
---
{
"uid": 0,
"indexUid": "tamo",
"status": "succeeded",
"type": "indexCreation",
"canceledBy": null,
"details": {
"primaryKey": null
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}

View File

@ -59,6 +59,7 @@ fn main() -> Result<(), Box<dyn Error>> {
false, false,
universe, universe,
&None, &None,
&None,
GeoSortStrategy::default(), GeoSortStrategy::default(),
0, 0,
20, 20,

View File

@ -134,6 +134,17 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
} }
)] )]
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool }, InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
#[error("Attribute `{}` is not filterable and thus, cannot be used as distinct attribute. {}",
.field,
match .valid_fields.is_empty() {
true => "This index does not have configured filterable attributes.".to_string(),
false => format!("Available filterable attributes are: `{}{}`.",
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
),
}
)]
InvalidDistinctAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
#[error("Attribute `{}` is not facet-searchable. {}", #[error("Attribute `{}` is not facet-searchable. {}",
.field, .field,
match .valid_fields.is_empty() { match .valid_fields.is_empty() {

View File

@ -159,6 +159,7 @@ impl<'a> Search<'a> {
offset: 0, offset: 0,
limit: self.limit + self.offset, limit: self.limit + self.offset,
sort_criteria: self.sort_criteria.clone(), sort_criteria: self.sort_criteria.clone(),
distinct: self.distinct.clone(),
searchable_attributes: self.searchable_attributes, searchable_attributes: self.searchable_attributes,
geo_strategy: self.geo_strategy, geo_strategy: self.geo_strategy,
terms_matching_strategy: self.terms_matching_strategy, terms_matching_strategy: self.terms_matching_strategy,

View File

@ -11,8 +11,8 @@ use self::new::{execute_vector_search, PartialSearchResult};
use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::vector::Embedder; use crate::vector::Embedder;
use crate::{ use crate::{
execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Error, Index,
SearchContext, TimeBudget, Result, SearchContext, TimeBudget, UserError,
}; };
// Building these factories is not free. // Building these factories is not free.
@ -40,6 +40,7 @@ pub struct Search<'a> {
offset: usize, offset: usize,
limit: usize, limit: usize,
sort_criteria: Option<Vec<AscDesc>>, sort_criteria: Option<Vec<AscDesc>>,
distinct: Option<String>,
searchable_attributes: Option<&'a [String]>, searchable_attributes: Option<&'a [String]>,
geo_strategy: new::GeoSortStrategy, geo_strategy: new::GeoSortStrategy,
terms_matching_strategy: TermsMatchingStrategy, terms_matching_strategy: TermsMatchingStrategy,
@ -61,6 +62,7 @@ impl<'a> Search<'a> {
offset: 0, offset: 0,
limit: 20, limit: 20,
sort_criteria: None, sort_criteria: None,
distinct: None,
searchable_attributes: None, searchable_attributes: None,
geo_strategy: new::GeoSortStrategy::default(), geo_strategy: new::GeoSortStrategy::default(),
terms_matching_strategy: TermsMatchingStrategy::default(), terms_matching_strategy: TermsMatchingStrategy::default(),
@ -105,6 +107,11 @@ impl<'a> Search<'a> {
self self
} }
pub fn distinct(&mut self, distinct: String) -> &mut Search<'a> {
self.distinct = Some(distinct);
self
}
pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> { pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> {
self.searchable_attributes = Some(searchable); self.searchable_attributes = Some(searchable);
self self
@ -169,6 +176,19 @@ impl<'a> Search<'a> {
ctx.attributes_to_search_on(searchable_attributes)?; ctx.attributes_to_search_on(searchable_attributes)?;
} }
if let Some(distinct) = &self.distinct {
let filterable_fields = ctx.index.filterable_fields(ctx.txn)?;
if !crate::is_faceted(distinct, &filterable_fields) {
let (valid_fields, hidden_fields) =
ctx.index.remove_hidden_fields(ctx.txn, filterable_fields)?;
return Err(Error::UserError(UserError::InvalidDistinctAttribute {
field: distinct.clone(),
valid_fields,
hidden_fields,
}));
}
}
let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?; let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?;
let PartialSearchResult { let PartialSearchResult {
located_query_terms, located_query_terms,
@ -185,6 +205,7 @@ impl<'a> Search<'a> {
self.scoring_strategy, self.scoring_strategy,
universe, universe,
&self.sort_criteria, &self.sort_criteria,
&self.distinct,
self.geo_strategy, self.geo_strategy,
self.offset, self.offset,
self.limit, self.limit,
@ -202,6 +223,7 @@ impl<'a> Search<'a> {
self.exhaustive_number_hits, self.exhaustive_number_hits,
universe, universe,
&self.sort_criteria, &self.sort_criteria,
&self.distinct,
self.geo_strategy, self.geo_strategy,
self.offset, self.offset,
self.limit, self.limit,
@ -238,6 +260,7 @@ impl fmt::Debug for Search<'_> {
offset, offset,
limit, limit,
sort_criteria, sort_criteria,
distinct,
searchable_attributes, searchable_attributes,
geo_strategy: _, geo_strategy: _,
terms_matching_strategy, terms_matching_strategy,
@ -257,6 +280,7 @@ impl fmt::Debug for Search<'_> {
.field("offset", offset) .field("offset", offset)
.field("limit", limit) .field("limit", limit)
.field("sort_criteria", sort_criteria) .field("sort_criteria", sort_criteria)
.field("distinct", distinct)
.field("searchable_attributes", searchable_attributes) .field("searchable_attributes", searchable_attributes)
.field("terms_matching_strategy", terms_matching_strategy) .field("terms_matching_strategy", terms_matching_strategy)
.field("scoring_strategy", scoring_strategy) .field("scoring_strategy", scoring_strategy)

View File

@ -22,6 +22,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ctx: &mut SearchContext<'ctx>, ctx: &mut SearchContext<'ctx>,
mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>, mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
query: &Q, query: &Q,
distinct: Option<&str>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
from: usize, from: usize,
length: usize, length: usize,
@ -34,7 +35,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
logger.ranking_rules(&ranking_rules); logger.ranking_rules(&ranking_rules);
logger.initial_universe(universe); logger.initial_universe(universe);
let distinct_fid = if let Some(field) = ctx.index.distinct_field(ctx.txn)? { let distinct_field = match distinct {
Some(distinct) => Some(distinct),
None => ctx.index.distinct_field(ctx.txn)?,
};
let distinct_fid = if let Some(field) = distinct_field {
ctx.index.fields_ids_map(ctx.txn)?.id(field) ctx.index.fields_ids_map(ctx.txn)?.id(field)
} else { } else {
None None

View File

@ -516,6 +516,7 @@ mod tests {
false, false,
universe, universe,
&None, &None,
&None,
crate::search::new::GeoSortStrategy::default(), crate::search::new::GeoSortStrategy::default(),
0, 0,
100, 100,

View File

@ -568,6 +568,7 @@ pub fn execute_vector_search(
scoring_strategy: ScoringStrategy, scoring_strategy: ScoringStrategy,
universe: RoaringBitmap, universe: RoaringBitmap,
sort_criteria: &Option<Vec<AscDesc>>, sort_criteria: &Option<Vec<AscDesc>>,
distinct: &Option<String>,
geo_strategy: geo_sort::Strategy, geo_strategy: geo_sort::Strategy,
from: usize, from: usize,
length: usize, length: usize,
@ -598,6 +599,7 @@ pub fn execute_vector_search(
ctx, ctx,
ranking_rules, ranking_rules,
&PlaceholderQuery, &PlaceholderQuery,
distinct.as_deref(),
&universe, &universe,
from, from,
length, length,
@ -627,6 +629,7 @@ pub fn execute_search(
exhaustive_number_hits: bool, exhaustive_number_hits: bool,
mut universe: RoaringBitmap, mut universe: RoaringBitmap,
sort_criteria: &Option<Vec<AscDesc>>, sort_criteria: &Option<Vec<AscDesc>>,
distinct: &Option<String>,
geo_strategy: geo_sort::Strategy, geo_strategy: geo_sort::Strategy,
from: usize, from: usize,
length: usize, length: usize,
@ -717,6 +720,7 @@ pub fn execute_search(
ctx, ctx,
ranking_rules, ranking_rules,
&graph, &graph,
distinct.as_deref(),
&universe, &universe,
from, from,
length, length,
@ -732,6 +736,7 @@ pub fn execute_search(
ctx, ctx,
ranking_rules, ranking_rules,
&PlaceholderQuery, &PlaceholderQuery,
distinct.as_deref(),
&universe, &universe,
from, from,
length, length,
@ -748,7 +753,12 @@ pub fn execute_search(
// The candidates is the universe unless the exhaustive number of hits // The candidates is the universe unless the exhaustive number of hits
// is requested and a distinct attribute is set. // is requested and a distinct attribute is set.
if exhaustive_number_hits { if exhaustive_number_hits {
if let Some(f) = ctx.index.distinct_field(ctx.txn)? { let distinct_field = match distinct.as_deref() {
Some(distinct) => Some(distinct),
None => ctx.index.distinct_field(ctx.txn)?,
};
if let Some(f) = distinct_field {
if let Some(distinct_fid) = fields_ids_map.id(f) { if let Some(distinct_fid) = fields_ids_map.id(f) {
all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining; all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining;
} }

View File

@ -205,8 +205,18 @@ fn create_index() -> TempIndex {
index index
} }
fn verify_distinct(index: &Index, txn: &RoTxn, docids: &[u32]) -> Vec<String> { fn verify_distinct(
let vs = collect_field_values(index, txn, index.distinct_field(txn).unwrap().unwrap(), docids); index: &Index,
txn: &RoTxn,
distinct: Option<&str>,
docids: &[u32],
) -> Vec<String> {
let vs = collect_field_values(
index,
txn,
distinct.or_else(|| index.distinct_field(txn).unwrap()).unwrap(),
docids,
);
let mut unique = HashSet::new(); let mut unique = HashSet::new();
for v in vs.iter() { for v in vs.iter() {
@ -223,12 +233,49 @@ fn verify_distinct(index: &Index, txn: &RoTxn, docids: &[u32]) -> Vec<String> {
fn test_distinct_placeholder_no_ranking_rules() { fn test_distinct_placeholder_no_ranking_rules() {
let index = create_index(); let index = create_index();
// Set the letter as filterable and unset the distinct attribute.
index
.update_settings(|s| {
s.set_filterable_fields(hashset! { S("letter") });
s.reset_distinct_field();
})
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.distinct(S("letter"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
let distinct_values = verify_distinct(&index, &txn, Some("letter"), &documents_ids);
insta::assert_debug_snapshot!(distinct_values, @r###"
[
"\"A\"",
"\"B\"",
"\"C\"",
"\"D\"",
"\"E\"",
"\"F\"",
"\"G\"",
"\"H\"",
"\"I\"",
"__does_not_exist__",
"__does_not_exist__",
"__does_not_exist__",
]
"###);
}
#[test]
fn test_distinct_at_search_placeholder_no_ranking_rules() {
let index = create_index();
let txn = index.read_txn().unwrap(); let txn = index.read_txn().unwrap();
let s = Search::new(&txn, &index); let s = Search::new(&txn, &index);
let SearchResult { documents_ids, .. } = s.execute().unwrap(); let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]"); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
let distinct_values = verify_distinct(&index, &txn, &documents_ids); let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
insta::assert_debug_snapshot!(distinct_values, @r###" insta::assert_debug_snapshot!(distinct_values, @r###"
[ [
"\"A\"", "\"A\"",
@ -263,7 +310,7 @@ fn test_distinct_placeholder_sort() {
let SearchResult { documents_ids, .. } = s.execute().unwrap(); let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]"); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]");
let distinct_values = verify_distinct(&index, &txn, &documents_ids); let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
insta::assert_debug_snapshot!(distinct_values, @r###" insta::assert_debug_snapshot!(distinct_values, @r###"
[ [
"\"E\"", "\"E\"",
@ -303,7 +350,7 @@ fn test_distinct_placeholder_sort() {
let SearchResult { documents_ids, .. } = s.execute().unwrap(); let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 20, 18, 15, 9, 8, 5, 2, 0, 24, 25, 26]"); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 20, 18, 15, 9, 8, 5, 2, 0, 24, 25, 26]");
let distinct_values = verify_distinct(&index, &txn, &documents_ids); let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
insta::assert_debug_snapshot!(distinct_values, @r###" insta::assert_debug_snapshot!(distinct_values, @r###"
[ [
"\"I\"", "\"I\"",
@ -346,7 +393,7 @@ fn test_distinct_placeholder_sort() {
let SearchResult { documents_ids, .. } = s.execute().unwrap(); let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[23, 20, 19, 17, 14, 8, 7, 4, 1, 26, 25, 24]"); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[23, 20, 19, 17, 14, 8, 7, 4, 1, 26, 25, 24]");
let distinct_values = verify_distinct(&index, &txn, &documents_ids); let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
insta::assert_debug_snapshot!(distinct_values, @r###" insta::assert_debug_snapshot!(distinct_values, @r###"
[ [
"\"I\"", "\"I\"",
@ -399,7 +446,7 @@ fn test_distinct_words() {
let SearchResult { documents_ids, .. } = s.execute().unwrap(); let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 26, 5, 8, 9, 15, 18, 20, 21, 25, 24]"); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 26, 5, 8, 9, 15, 18, 20, 21, 25, 24]");
let distinct_values = verify_distinct(&index, &txn, &documents_ids); let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
insta::assert_debug_snapshot!(distinct_values, @r###" insta::assert_debug_snapshot!(distinct_values, @r###"
[ [
"\"A\"", "\"A\"",
@ -453,7 +500,7 @@ fn test_distinct_sort_words() {
let SearchResult { documents_ids, .. } = s.execute().unwrap(); let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[22, 20, 19, 16, 9, 8, 7, 3, 1, 26, 25, 24]"); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[22, 20, 19, 16, 9, 8, 7, 3, 1, 26, 25, 24]");
let distinct_values = verify_distinct(&index, &txn, &documents_ids); let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
insta::assert_debug_snapshot!(distinct_values, @r###" insta::assert_debug_snapshot!(distinct_values, @r###"
[ [
"\"I\"", "\"I\"",
@ -549,7 +596,7 @@ fn test_distinct_typo() {
let SearchResult { documents_ids, .. } = s.execute().unwrap(); let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 26, 0, 7, 8, 9, 15, 22, 18, 20, 25, 24]"); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 26, 0, 7, 8, 9, 15, 22, 18, 20, 25, 24]");
let distinct_values = verify_distinct(&index, &txn, &documents_ids); let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
insta::assert_debug_snapshot!(distinct_values, @r###" insta::assert_debug_snapshot!(distinct_values, @r###"
[ [
"\"B\"", "\"B\"",