From 0d2e7bcc130ce187721d275752c94ed9f02fb395 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 30 Mar 2023 16:10:10 +0200 Subject: [PATCH] Implement the previous way for the exhaustive distinct candidates --- milli/src/search/mod.rs | 11 +++++++---- milli/src/search/new/mod.rs | 16 ++++++++++++++-- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 734671990..08803b73f 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -1,3 +1,9 @@ +use std::fmt; + +use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; +use once_cell::sync::Lazy; +use roaring::bitmap::RoaringBitmap; + pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET}; pub use self::matches::{ FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords, @@ -5,10 +11,6 @@ pub use self::matches::{ use crate::{ execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext, }; -use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; -use once_cell::sync::Lazy; -use roaring::bitmap::RoaringBitmap; -use std::fmt; // Building these factories is not free. static LEVDIST0: Lazy = Lazy::new(|| LevBuilder::new(0, true)); @@ -112,6 +114,7 @@ impl<'a> Search<'a> { &mut ctx, &self.query, self.terms_matching_strategy, + self.exhaustive_number_hits, &self.filter, &self.sort_criteria, self.offset, diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 4061df162..4d2805fef 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -37,6 +37,7 @@ use self::interner::Interner; use self::ranking_rules::{BoxRankingRule, RankingRule}; use self::resolve_query_graph::compute_query_graph_docids; use self::sort::Sort; +use crate::search::new::distinct::{apply_distinct_rule, DistinctOutput}; use crate::{ AscDesc, Filter, Index, MatchingWords, Member, Result, SearchResult, TermsMatchingStrategy, UserError, @@ -272,6 +273,7 @@ pub fn execute_search( ctx: &mut SearchContext, query: &Option, terms_matching_strategy: TermsMatchingStrategy, + exhaustive_number_hits: bool, filters: &Option, sort_criteria: &Option>, from: usize, @@ -333,11 +335,21 @@ pub fn execute_search( )? }; + // The candidates is the universe unless the exhaustive number of hits + // is requested and a distinct attribute is set. + let mut candidates = universe; + if exhaustive_number_hits { + if let Some(f) = ctx.index.distinct_field(ctx.txn)? { + if let Some(distinct_fid) = ctx.index.fields_ids_map(ctx.txn)?.id(f) { + candidates = apply_distinct_rule(ctx, distinct_fid, &candidates)?.remaining; + } + } + } + Ok(SearchResult { // TODO: correct matching words matching_words: MatchingWords::default(), - // TODO: candidates with distinct - candidates: universe, + candidates, documents_ids, }) }