diff --git a/milli/src/search/new/graph_based_ranking_rule.rs b/milli/src/search/new/graph_based_ranking_rule.rs index 5a28ab58a..a568e6d27 100644 --- a/milli/src/search/new/graph_based_ranking_rule.rs +++ b/milli/src/search/new/graph_based_ranking_rule.rs @@ -36,6 +36,8 @@ That is we find the documents where either: - OR: `pretty` is 2-close to `house` AND `house` is 1-close to `by` */ +use std::ops::ControlFlow; + use roaring::RoaringBitmap; use super::interner::MappedInterner; @@ -263,7 +265,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase graph.remove_edges_with_condition(condition); // 3. Also remove the entry from the edge_docids_cache, since we don't need it anymore edge_docids_cache.cache.remove(&condition); - return Ok(()); + return Ok(ControlFlow::Continue(())); } path_docids &= edge_docids; @@ -287,14 +289,18 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase } // We should maybe instead try to compute: // 0th & nth & 1st & n-1th & 2nd & etc... - return Ok(()); + return Ok(ControlFlow::Continue(())); } } bucket |= &path_docids; // Reduce the size of the universe so that we can more optimistically discard candidate paths universe -= path_docids; - // TODO: if the universe is empty, stop iterating - Ok(()) + + if universe.is_empty() { + Ok(ControlFlow::Break(())) + } else { + Ok(ControlFlow::Continue(())) + } }, )?; diff --git a/milli/src/search/new/logger/detailed.rs b/milli/src/search/new/logger/detailed.rs index fb5a296bd..f3ce3f7e9 100644 --- a/milli/src/search/new/logger/detailed.rs +++ b/milli/src/search/new/logger/detailed.rs @@ -10,7 +10,7 @@ use crate::search::new::interner::{Interned, MappedInterner}; use crate::search::new::query_graph::QueryNodeData; use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm}; use crate::search::new::ranking_rule_graph::{ - DeadEndPathCache, Edge, EdgeCondition, ProximityEdge, ProximityGraph, RankingRuleGraph, + DeadEndPathCache, Edge, EdgeCondition, ProximityCondition, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait, TypoEdge, TypoGraph, }; use crate::search::new::small_bitmap::SmallBitmap; @@ -46,7 +46,7 @@ pub enum SearchEvents { paths: Vec>, empty_paths_cache: DeadEndPathCache, universe: RoaringBitmap, - distances: MappedInterner)>, QueryNode>, + distances: MappedInterner)>, QueryNode>, cost: u16, }, TypoState { @@ -172,7 +172,7 @@ impl SearchLogger for DetailedSearchLogger { paths_map: &[Vec], empty_paths_cache: &DeadEndPathCache, universe: &RoaringBitmap, - distances: &MappedInterner)>, QueryNode>, + distances: &MappedInterner)>, QueryNode>, cost: u16, ) { self.events.push(SearchEvents::ProximityState { diff --git a/milli/src/search/new/logger/mod.rs b/milli/src/search/new/logger/mod.rs index ff500d4b8..c2e9bca80 100644 --- a/milli/src/search/new/logger/mod.rs +++ b/milli/src/search/new/logger/mod.rs @@ -6,7 +6,7 @@ use roaring::RoaringBitmap; use super::interner::MappedInterner; use super::query_graph::QueryNode; use super::ranking_rule_graph::{ - DeadEndPathCache, ProximityEdge, ProximityGraph, RankingRuleGraph, TypoEdge, TypoGraph, + DeadEndPathCache, ProximityCondition, ProximityGraph, RankingRuleGraph, TypoEdge, TypoGraph, }; use super::small_bitmap::SmallBitmap; use super::{RankingRule, RankingRuleQueryTrait}; @@ -68,7 +68,7 @@ pub trait SearchLogger { paths: &[Vec], empty_paths_cache: &DeadEndPathCache, universe: &RoaringBitmap, - distances: &MappedInterner)>, QueryNode>, + distances: &MappedInterner)>, QueryNode>, cost: u16, ); @@ -139,7 +139,7 @@ impl SearchLogger for DefaultSearchLogger { _paths_map: &[Vec], _empty_paths_cache: &DeadEndPathCache, _universe: &RoaringBitmap, - _distances: &MappedInterner)>, QueryNode>, + _distances: &MappedInterner)>, QueryNode>, _cost: u16, ) { } diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 11420545c..02cd7b1de 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -303,16 +303,16 @@ mod tests { let mut ctx = SearchContext::new(&index, &txn); let results = execute_search( &mut ctx, - // "which a the releases from poison by the government", + "releases from poison by the government", // "sun flower s are the best", - "zero config", + // "zero config", TermsMatchingStrategy::Last, None, 0, 20, &mut DefaultSearchLogger, &mut DefaultSearchLogger, - //&mut logger, + // &mut logger, ) .unwrap(); @@ -359,9 +359,9 @@ mod tests { let start = Instant::now(); let mut s = Search::new(&txn, &index); - s.query("which a the releases from poison by the government"); + s.query("releases from poison by the government"); s.terms_matching_strategy(TermsMatchingStrategy::Last); - s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased); + // s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased); let docs = s.execute().unwrap(); let elapsed = start.elapsed(); diff --git a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs index 597aff661..cc3bfd7b4 100644 --- a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs +++ b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs @@ -2,6 +2,7 @@ use std::collections::btree_map::Entry; use std::collections::{BTreeMap, VecDeque}; +use std::ops::ControlFlow; use super::empty_paths_cache::DeadEndPathCache; use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait}; @@ -23,7 +24,7 @@ impl RankingRuleGraph { cost: u16, all_distances: &MappedInterner)>, QueryNode>, empty_paths_cache: &mut DeadEndPathCache, - mut visit: impl FnMut(&[u16], &mut Self, &mut DeadEndPathCache) -> Result<()>, + mut visit: impl FnMut(&[u16], &mut Self, &mut DeadEndPathCache) -> Result>, ) -> Result<()> { let _ = self.visit_paths_of_cost_rec( from, @@ -43,7 +44,7 @@ impl RankingRuleGraph { cost: u16, all_distances: &MappedInterner)>, QueryNode>, empty_paths_cache: &mut DeadEndPathCache, - visit: &mut impl FnMut(&[u16], &mut Self, &mut DeadEndPathCache) -> Result<()>, + visit: &mut impl FnMut(&[u16], &mut Self, &mut DeadEndPathCache) -> Result>, prev_conditions: &mut Vec, cur_path: &mut SmallBitmap, forbidden_conditions: &mut SmallBitmap, @@ -60,7 +61,11 @@ impl RankingRuleGraph { EdgeCondition::Unconditional => { if edge.dest_node == self.query_graph.end_node { any_valid = true; - visit(prev_conditions, self, empty_paths_cache)?; + let control_flow = visit(prev_conditions, self, empty_paths_cache)?; + match control_flow { + ControlFlow::Continue(_) => {} + ControlFlow::Break(_) => return Ok(true), + } true } else { self.visit_paths_of_cost_rec( @@ -101,7 +106,11 @@ impl RankingRuleGraph { ); let next_any_valid = if edge.dest_node == self.query_graph.end_node { any_valid = true; - visit(prev_conditions, self, empty_paths_cache)?; + let control_flow = visit(prev_conditions, self, empty_paths_cache)?; + match control_flow { + ControlFlow::Continue(_) => {} + ControlFlow::Break(_) => return Ok(true), + } true } else { self.visit_paths_of_cost_rec( diff --git a/milli/src/search/new/ranking_rule_graph/mod.rs b/milli/src/search/new/ranking_rule_graph/mod.rs index 7b82dc0a1..4e0384ae0 100644 --- a/milli/src/search/new/ranking_rule_graph/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/mod.rs @@ -20,7 +20,7 @@ use std::hash::Hash; pub use edge_docids_cache::EdgeConditionDocIdsCache; pub use empty_paths_cache::DeadEndPathCache; -pub use proximity::{ProximityEdge, ProximityGraph}; +pub use proximity::{ProximityCondition, ProximityGraph}; use roaring::RoaringBitmap; pub use typo::{TypoEdge, TypoGraph}; diff --git a/milli/src/search/new/ranking_rule_graph/proximity/build.rs b/milli/src/search/new/ranking_rule_graph/proximity/build.rs index 556b3cb2b..c7eaa5d0c 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/build.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/build.rs @@ -1,7 +1,7 @@ #![allow(clippy::too_many_arguments)] use std::collections::BTreeMap; -use super::ProximityEdge; +use super::ProximityCondition; use crate::search::new::db_cache::DatabaseCache; use crate::search::new::interner::{DedupInterner, Interned}; use crate::search::new::query_graph::QueryNodeData; @@ -37,10 +37,10 @@ fn first_word_of_term_iter<'t>( pub fn build_edges<'ctx>( ctx: &mut SearchContext<'ctx>, - conditions_interner: &mut DedupInterner, + conditions_interner: &mut DedupInterner, from_node: &QueryNode, to_node: &QueryNode, -) -> Result)>> { +) -> Result)>> { let SearchContext { index, txn, @@ -51,24 +51,33 @@ pub fn build_edges<'ctx>( term_docids: _, } = ctx; - let (left_term, left_end_position) = match &from_node.data { - QueryNodeData::Term(LocatedQueryTerm { value, positions }) => { - (term_interner.get(*value), *positions.end()) - } - QueryNodeData::Deleted => return Ok(vec![]), - QueryNodeData::Start => return Ok(vec![(0, EdgeCondition::Unconditional)]), - QueryNodeData::End => return Ok(vec![]), - }; - let right_term = match &to_node.data { QueryNodeData::End => return Ok(vec![(0, EdgeCondition::Unconditional)]), QueryNodeData::Deleted | QueryNodeData::Start => return Ok(vec![]), QueryNodeData::Term(term) => term, }; - let LocatedQueryTerm { value: right_value, positions: right_positions } = right_term; + + let LocatedQueryTerm { value: right_term_interned, positions: right_positions } = right_term; let (right_term, right_start_position, right_ngram_length) = - (term_interner.get(*right_value), *right_positions.start(), right_positions.len()); + (term_interner.get(*right_term_interned), *right_positions.start(), right_positions.len()); + + let (left_term, left_end_position) = match &from_node.data { + QueryNodeData::Term(LocatedQueryTerm { value, positions }) => { + (term_interner.get(*value), *positions.end()) + } + QueryNodeData::Deleted => return Ok(vec![]), + QueryNodeData::Start => { + return Ok(vec![( + (right_ngram_length - 1) as u8, + EdgeCondition::Conditional( + conditions_interner + .insert(ProximityCondition::Term { term: *right_term_interned }), + ), + )]) + } + QueryNodeData::End => return Ok(vec![]), + }; if left_end_position + 1 != right_start_position { // We want to ignore this pair of terms @@ -77,7 +86,12 @@ pub fn build_edges<'ctx>( // `flowers` is removed by the `words` ranking rule. // The remaining query graph represents `the sun .. are beautiful` // but `sun` and `are` have no proximity condition between them - return Ok(vec![(0, EdgeCondition::Unconditional)]); + return Ok(vec![( + (right_ngram_length - 1) as u8, + EdgeCondition::Conditional( + conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned }), + ), + )]); } let mut cost_proximity_word_pairs = BTreeMap::>>::new(); @@ -121,24 +135,30 @@ pub fn build_edges<'ctx>( } } - let mut new_edges = - cost_proximity_word_pairs - .into_iter() - .flat_map(|(cost, proximity_word_pairs)| { - let mut edges = vec![]; - for (proximity, word_pairs) in proximity_word_pairs { - edges.push(( - cost, - EdgeCondition::Conditional(conditions_interner.insert(ProximityEdge { + let mut new_edges = cost_proximity_word_pairs + .into_iter() + .flat_map(|(cost, proximity_word_pairs)| { + let mut edges = vec![]; + for (proximity, word_pairs) in proximity_word_pairs { + edges.push(( + cost, + EdgeCondition::Conditional(conditions_interner.insert( + ProximityCondition::Pairs { pairs: word_pairs.into_boxed_slice(), proximity, - })), - )) - } - edges - }) - .collect::>(); - new_edges.push((8 + (right_ngram_length - 1) as u8, EdgeCondition::Unconditional)); + }, + )), + )) + } + edges + }) + .collect::>(); + new_edges.push(( + 8 + (right_ngram_length - 1) as u8, + EdgeCondition::Conditional( + conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned }), + ), + )); Ok(new_edges) } diff --git a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs index 0acee0329..1123692f3 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs @@ -1,16 +1,39 @@ use roaring::RoaringBitmap; -use super::{ProximityEdge, WordPair}; +use super::{ProximityCondition, WordPair}; use crate::search::new::SearchContext; use crate::{CboRoaringBitmapCodec, Result}; pub fn compute_docids<'ctx>( ctx: &mut SearchContext<'ctx>, - edge: &ProximityEdge, + edge: &ProximityCondition, universe: &RoaringBitmap, ) -> Result { - let SearchContext { index, txn, db_cache, word_interner, .. } = ctx; - let ProximityEdge { pairs, proximity } = edge; + let SearchContext { + index, + txn, + db_cache, + word_interner, + term_docids, + phrase_interner, + term_interner, + } = ctx; + let (pairs, proximity) = match edge { + ProximityCondition::Term { term } => { + return term_docids + .get_query_term_docids( + index, + txn, + db_cache, + word_interner, + term_interner, + phrase_interner, + *term, + ) + .cloned() + } + ProximityCondition::Pairs { pairs, proximity } => (pairs, proximity), + }; let mut pair_docids = RoaringBitmap::new(); for pair in pairs.iter() { let pair = match pair { diff --git a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs index 2d226cfc7..427a1e904 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs @@ -4,15 +4,15 @@ pub mod compute_docids; use roaring::RoaringBitmap; use super::empty_paths_cache::DeadEndPathCache; -use super::{EdgeCondition, RankingRuleGraphTrait}; +use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait}; use crate::search::new::interner::{DedupInterner, Interned, MappedInterner}; use crate::search::new::logger::SearchLogger; -use crate::search::new::query_term::Phrase; +use crate::search::new::query_term::{Phrase, QueryTerm}; use crate::search::new::small_bitmap::SmallBitmap; use crate::search::new::{QueryGraph, QueryNode, SearchContext}; use crate::Result; -#[derive(Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum WordPair { Words { phrases: Vec>, @@ -31,27 +31,33 @@ pub enum WordPair { } #[derive(Clone, PartialEq, Eq, Hash)] -pub struct ProximityEdge { - pairs: Box<[WordPair]>, - proximity: u8, +pub enum ProximityCondition { + Term { term: Interned }, + Pairs { pairs: Box<[WordPair]>, proximity: u8 }, } pub enum ProximityGraph {} impl RankingRuleGraphTrait for ProximityGraph { - type EdgeCondition = ProximityEdge; + type EdgeCondition = ProximityCondition; fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String { - let ProximityEdge { pairs, proximity } = edge; - format!(", prox {proximity}, {} pairs", pairs.len()) + match edge { + ProximityCondition::Term { term } => { + format!("term {term}") + } + ProximityCondition::Pairs { pairs, proximity } => { + format!("prox {proximity}, {} pairs", pairs.len()) + } + } } fn resolve_edge_condition<'ctx>( ctx: &mut SearchContext<'ctx>, - edge: &Self::EdgeCondition, + condition: &Self::EdgeCondition, universe: &RoaringBitmap, ) -> Result { - compute_docids::compute_docids(ctx, edge, universe) + compute_docids::compute_docids(ctx, condition, universe) } fn build_edges<'ctx>( @@ -64,11 +70,11 @@ impl RankingRuleGraphTrait for ProximityGraph { } fn log_state( - graph: &super::RankingRuleGraph, + graph: &RankingRuleGraph, paths: &[Vec], empty_paths_cache: &DeadEndPathCache, universe: &RoaringBitmap, - distances: &MappedInterner)>, QueryNode>, + distances: &MappedInterner)>, QueryNode>, cost: u16, logger: &mut dyn SearchLogger, ) {