diff --git a/milli/src/search/new/graph_based_ranking_rule.rs b/milli/src/search/new/graph_based_ranking_rule.rs index d8f881b07..f35d024cc 100644 --- a/milli/src/search/new/graph_based_ranking_rule.rs +++ b/milli/src/search/new/graph_based_ranking_rule.rs @@ -40,12 +40,25 @@ use roaring::RoaringBitmap; use super::logger::SearchLogger; use super::ranking_rule_graph::{ - EdgeDocidsCache, EmptyPathsCache, RankingRuleGraph, RankingRuleGraphTrait, + EdgeDocidsCache, EmptyPathsCache, RankingRuleGraph, RankingRuleGraphTrait, TypoGraph, ProximityGraph, }; use super::small_bitmap::SmallBitmap; use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; use crate::Result; +pub type Proximity = GraphBasedRankingRule; +impl Default for GraphBasedRankingRule { + fn default() -> Self { + Self::new("proximity".to_owned()) + } +} +pub type Typo = GraphBasedRankingRule; +impl Default for GraphBasedRankingRule { + fn default() -> Self { + Self::new("typo".to_owned()) + } +} + /// A generic graph-based ranking rule pub struct GraphBasedRankingRule { id: String, diff --git a/milli/src/search/new/logger/detailed.rs b/milli/src/search/new/logger/detailed.rs index ef33bdbf9..ba443752d 100644 --- a/milli/src/search/new/logger/detailed.rs +++ b/milli/src/search/new/logger/detailed.rs @@ -98,7 +98,7 @@ impl SearchLogger for DetailedSearchLogger { fn initial_universe(&mut self, universe: &RoaringBitmap) { self.initial_universe = Some(universe.clone()); } - fn ranking_rules(&mut self, rr: &[&mut dyn RankingRule]) { + fn ranking_rules(&mut self, rr: &[Box>]) { self.ranking_rules_ids = Some(rr.iter().map(|rr| rr.id()).collect()); } diff --git a/milli/src/search/new/logger/mod.rs b/milli/src/search/new/logger/mod.rs index d4d64f844..c5f3e5351 100644 --- a/milli/src/search/new/logger/mod.rs +++ b/milli/src/search/new/logger/mod.rs @@ -19,7 +19,7 @@ pub trait SearchLogger { fn initial_universe(&mut self, universe: &RoaringBitmap); /// Logs the ranking rules used to perform the search query - fn ranking_rules(&mut self, rr: &[&mut dyn RankingRule]); + fn ranking_rules(&mut self, rr: &[Box>]); /// Logs the start of a ranking rule's iteration. fn start_iteration_ranking_rule<'transaction>( @@ -90,7 +90,7 @@ impl SearchLogger for DefaultSearchLogger { fn initial_universe(&mut self, _universe: &RoaringBitmap) {} - fn ranking_rules(&mut self, _rr: &[&mut dyn RankingRule]) {} + fn ranking_rules(&mut self, _rr: &[Box>]) {} fn start_iteration_ranking_rule<'transaction>( &mut self, diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 125e2b1e0..18c51f4a4 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -17,7 +17,7 @@ mod words; pub use logger::{DefaultSearchLogger, SearchLogger}; -use std::collections::BTreeSet; +use std::collections::{BTreeSet, HashSet}; use charabia::Tokenize; use db_cache::DatabaseCache; @@ -28,10 +28,10 @@ use roaring::RoaringBitmap; use self::interner::Interner; use self::query_term::{Phrase, WordDerivations}; +use self::ranking_rules::PlaceholderQuery; use self::resolve_query_graph::{resolve_query_graph, QueryTermDocIdsCache}; -use crate::search::new::graph_based_ranking_rule::GraphBasedRankingRule; +use crate::search::new::graph_based_ranking_rule::{Proximity, Typo}; use crate::search::new::query_term::located_query_terms_from_string; -use crate::search::new::ranking_rule_graph::{ProximityGraph, TypoGraph}; use crate::search::new::words::Words; use crate::{Filter, Index, Result, TermsMatchingStrategy}; @@ -88,7 +88,9 @@ fn resolve_maximally_reduced_query_graph<'search>( TermsMatchingStrategy::All => vec![], }; // don't remove the first term - positions_to_remove.remove(0); + if !positions_to_remove.is_empty() { + positions_to_remove.remove(0); + } loop { if positions_to_remove.is_empty() { break; @@ -102,48 +104,172 @@ fn resolve_maximally_reduced_query_graph<'search>( Ok(docids) } +fn get_ranking_rules_for_placeholder_search<'search>( + ctx: &SearchContext<'search>, +) -> Result>>> { + // let sort = false; + // let mut asc = HashSet::new(); + // let mut desc = HashSet::new(); + let /*mut*/ ranking_rules: Vec>> = vec![]; + let settings_ranking_rules = ctx.index.criteria(ctx.txn)?; + for rr in settings_ranking_rules { + // Add Words before any of: typo, proximity, attribute, exactness + match rr { + crate::Criterion::Words + | crate::Criterion::Typo + | crate::Criterion::Attribute + | crate::Criterion::Proximity + | crate::Criterion::Exactness => continue, + crate::Criterion::Sort => todo!(), + crate::Criterion::Asc(_) => todo!(), + crate::Criterion::Desc(_) => todo!(), + } + } + Ok(ranking_rules) +} +fn get_ranking_rules_for_query_graph_search<'search>( + ctx: &SearchContext<'search>, + terms_matching_strategy: TermsMatchingStrategy, +) -> Result>>> { + // query graph search + let mut words = false; + let mut typo = false; + let mut proximity = false; + let sort = false; + let attribute = false; + let exactness = false; + let mut asc = HashSet::new(); + let mut desc = HashSet::new(); + + let mut ranking_rules: Vec>> = vec![]; + let settings_ranking_rules = ctx.index.criteria(ctx.txn)?; + for rr in settings_ranking_rules { + // Add Words before any of: typo, proximity, attribute, exactness + match rr { + crate::Criterion::Typo + | crate::Criterion::Attribute + | crate::Criterion::Proximity + | crate::Criterion::Exactness => { + if !words { + ranking_rules.push(Box::new(Words::new(terms_matching_strategy))); + words = true; + } + } + _ => {} + } + match rr { + crate::Criterion::Words => { + if words { + continue; + } + ranking_rules.push(Box::new(Words::new(terms_matching_strategy))); + words = true; + } + crate::Criterion::Typo => { + if typo { + continue; + } + typo = true; + ranking_rules.push(Box::::default()); + } + crate::Criterion::Proximity => { + if proximity { + continue; + } + proximity = true; + ranking_rules.push(Box::::default()); + } + crate::Criterion::Attribute => { + if attribute { + continue; + } + todo!(); + // attribute = false; + } + crate::Criterion::Sort => { + if sort { + continue; + } + todo!(); + // sort = false; + } + crate::Criterion::Exactness => { + if exactness { + continue; + } + todo!(); + // exactness = false; + } + crate::Criterion::Asc(field) => { + if asc.contains(&field) { + continue; + } + asc.insert(field); + todo!(); + } + crate::Criterion::Desc(field) => { + if desc.contains(&field) { + continue; + } + desc.insert(field); + todo!(); + } + } + } + Ok(ranking_rules) +} #[allow(clippy::too_many_arguments)] pub fn execute_search<'search>( ctx: &mut SearchContext<'search>, query: &str, + terms_matching_strategy: TermsMatchingStrategy, filters: Option, from: usize, length: usize, - logger: &mut dyn SearchLogger, + placeholder_search_logger: &mut dyn SearchLogger, + query_graph_logger: &mut dyn SearchLogger, ) -> Result> { assert!(!query.is_empty()); let query_terms = located_query_terms_from_string(ctx, query.tokenize(), None)?; let graph = QueryGraph::from_query(ctx, query_terms)?; - logger.initial_query(&graph); - let universe = if let Some(filters) = filters { filters.evaluate(ctx.txn, ctx.index)? } else { ctx.index.documents_ids(ctx.txn)? }; - let universe = resolve_maximally_reduced_query_graph( - ctx, - &universe, - &graph, - TermsMatchingStrategy::Last, - logger, - )?; - // TODO: create ranking rules here + // TODO: other way to tell whether it is a placeholder search + // This way of doing things is not correct because if someone searches + // for a word that does not appear in any document, the word will be removed + // from the graph and thus its number of nodes will be == 2 + // But in that case, we should return no results. + // + // The search is a placeholder search only if there are no tokens? + if graph.nodes.len() > 2 { + let universe = resolve_maximally_reduced_query_graph( + ctx, + &universe, + &graph, + terms_matching_strategy, + query_graph_logger, + )?; - logger.initial_universe(&universe); - - let words = &mut Words::new(TermsMatchingStrategy::Last); - // let sort = &mut Sort::new(index, txn, "release_date".to_owned(), true)?; - let proximity = &mut GraphBasedRankingRule::::new("proximity".to_owned()); - let typo = &mut GraphBasedRankingRule::::new("typo".to_owned()); - // TODO: ranking rules given as argument - let ranking_rules: Vec<&mut dyn RankingRule<'search, QueryGraph>> = - vec![words, typo, proximity /*sort*/]; - - bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, logger) + let ranking_rules = get_ranking_rules_for_query_graph_search(ctx, terms_matching_strategy)?; + bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger) + } else { + let ranking_rules = get_ranking_rules_for_placeholder_search(ctx)?; + bucket_sort( + ctx, + ranking_rules, + &PlaceholderQuery, + &universe, + from, + length, + placeholder_search_logger, + ) + } } #[cfg(test)] @@ -182,10 +308,11 @@ mod tests { let results = execute_search( &mut ctx, "zero config", + TermsMatchingStrategy::Last, None, 0, 20, - // &mut DefaultSearchLogger, + &mut DefaultSearchLogger, &mut logger, ) .unwrap(); @@ -279,10 +406,11 @@ mod tests { let results = execute_search( &mut ctx, "releases from poison by the government", + TermsMatchingStrategy::Last, None, 0, 20, - // &mut DefaultSearchLogger, + &mut DefaultSearchLogger, &mut logger, ) .unwrap(); diff --git a/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs b/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs index deac05502..3178cfe27 100644 --- a/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs +++ b/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs @@ -1,4 +1,4 @@ -use super::paths_map::PathSet; +use super::path_set::PathSet; use crate::search::new::small_bitmap::SmallBitmap; /// A cache which stores sufficient conditions for a path @@ -10,7 +10,7 @@ pub struct EmptyPathsCache { pub empty_edges: SmallBitmap, /// A set of path prefixes that resolve to no documents. pub empty_prefixes: PathSet, - /// A set of empty couple of edge indexes that resolve to no documents. + /// A set of empty couples of edge indexes that resolve to no documents. pub empty_couple_edges: Vec, } impl EmptyPathsCache { diff --git a/milli/src/search/new/ranking_rule_graph/mod.rs b/milli/src/search/new/ranking_rule_graph/mod.rs index 3f74a3cf5..143554c72 100644 --- a/milli/src/search/new/ranking_rule_graph/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/mod.rs @@ -9,7 +9,7 @@ mod build; mod cheapest_paths; mod edge_docids_cache; mod empty_paths_cache; -mod paths_map; +mod path_set; /// Implementation of the `proximity` ranking rule mod proximity; diff --git a/milli/src/search/new/ranking_rule_graph/paths_map.rs b/milli/src/search/new/ranking_rule_graph/path_set.rs similarity index 100% rename from milli/src/search/new/ranking_rule_graph/paths_map.rs rename to milli/src/search/new/ranking_rule_graph/path_set.rs diff --git a/milli/src/search/new/ranking_rules.rs b/milli/src/search/new/ranking_rules.rs index 350eed58f..57817fd7e 100644 --- a/milli/src/search/new/ranking_rules.rs +++ b/milli/src/search/new/ranking_rules.rs @@ -70,16 +70,15 @@ pub struct RankingRuleOutput { pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>( ctx: &mut SearchContext<'search>, - mut ranking_rules: Vec<&mut dyn RankingRule<'search, Q>>, - query_graph: &Q, + mut ranking_rules: Vec>>, + query: &Q, universe: &RoaringBitmap, from: usize, length: usize, logger: &mut dyn SearchLogger, ) -> Result> { - logger.initial_query(query_graph); - logger.ranking_rules(&ranking_rules); + logger.initial_universe(universe); let distinct_fid = if let Some(field) = ctx.index.distinct_field(ctx.txn)? { ctx.index.fields_ids_map(ctx.txn)?.id(field) @@ -92,8 +91,8 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>( } let ranking_rules_len = ranking_rules.len(); - logger.start_iteration_ranking_rule(0, ranking_rules[0], query_graph, universe); - ranking_rules[0].start_iteration(ctx, logger, universe, query_graph)?; + logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe); + ranking_rules[0].start_iteration(ctx, logger, universe, query)?; let mut ranking_rule_universes: Vec = vec![RoaringBitmap::default(); ranking_rules_len]; @@ -109,7 +108,7 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>( assert!(ranking_rule_universes[cur_ranking_rule_index].is_empty()); logger.end_iteration_ranking_rule( cur_ranking_rule_index, - ranking_rules[cur_ranking_rule_index], + ranking_rules[cur_ranking_rule_index].as_ref(), &ranking_rule_universes[cur_ranking_rule_index], ); ranking_rule_universes[cur_ranking_rule_index].clear(); @@ -149,7 +148,7 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>( // then just skip the bucket logger.skip_bucket_ranking_rule( cur_ranking_rule_index, - ranking_rules[cur_ranking_rule_index], + ranking_rules[cur_ranking_rule_index].as_ref(), &candidates, ); } else { @@ -159,7 +158,7 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>( all_candidates.split_at(from - cur_offset); logger.skip_bucket_ranking_rule( cur_ranking_rule_index, - ranking_rules[cur_ranking_rule_index], + ranking_rules[cur_ranking_rule_index].as_ref(), &skipped_candidates.into_iter().collect(), ); let candidates = candidates @@ -186,7 +185,6 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>( // anything, just extend the results and go back to the parent ranking rule. if ranking_rule_universes[cur_ranking_rule_index].len() <= 1 { maybe_add_to_results!(&ranking_rule_universes[cur_ranking_rule_index]); - ranking_rule_universes[cur_ranking_rule_index].clear(); back!(); continue; } @@ -198,7 +196,7 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>( logger.next_bucket_ranking_rule( cur_ranking_rule_index, - ranking_rules[cur_ranking_rule_index], + ranking_rules[cur_ranking_rule_index].as_ref(), &ranking_rule_universes[cur_ranking_rule_index], &next_bucket.candidates, ); @@ -218,7 +216,7 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>( ranking_rule_universes[cur_ranking_rule_index] = next_bucket.candidates.clone(); logger.start_iteration_ranking_rule( cur_ranking_rule_index, - ranking_rules[cur_ranking_rule_index], + ranking_rules[cur_ranking_rule_index].as_ref(), &next_bucket.query, &ranking_rule_universes[cur_ranking_rule_index], );