mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Fix bug in the proximity ranking rule for queries with ngrams
This commit is contained in:
parent
e9cf58d584
commit
c0cdaf9f53
@ -36,6 +36,8 @@ That is we find the documents where either:
|
||||
- OR: `pretty` is 2-close to `house` AND `house` is 1-close to `by`
|
||||
*/
|
||||
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::interner::MappedInterner;
|
||||
@ -263,7 +265,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
graph.remove_edges_with_condition(condition);
|
||||
// 3. Also remove the entry from the edge_docids_cache, since we don't need it anymore
|
||||
edge_docids_cache.cache.remove(&condition);
|
||||
return Ok(());
|
||||
return Ok(ControlFlow::Continue(()));
|
||||
}
|
||||
path_docids &= edge_docids;
|
||||
|
||||
@ -287,14 +289,18 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
}
|
||||
// We should maybe instead try to compute:
|
||||
// 0th & nth & 1st & n-1th & 2nd & etc...
|
||||
return Ok(());
|
||||
return Ok(ControlFlow::Continue(()));
|
||||
}
|
||||
}
|
||||
bucket |= &path_docids;
|
||||
// Reduce the size of the universe so that we can more optimistically discard candidate paths
|
||||
universe -= path_docids;
|
||||
// TODO: if the universe is empty, stop iterating
|
||||
Ok(())
|
||||
|
||||
if universe.is_empty() {
|
||||
Ok(ControlFlow::Break(()))
|
||||
} else {
|
||||
Ok(ControlFlow::Continue(()))
|
||||
}
|
||||
},
|
||||
)?;
|
||||
|
||||
|
@ -10,7 +10,7 @@ use crate::search::new::interner::{Interned, MappedInterner};
|
||||
use crate::search::new::query_graph::QueryNodeData;
|
||||
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm};
|
||||
use crate::search::new::ranking_rule_graph::{
|
||||
DeadEndPathCache, Edge, EdgeCondition, ProximityEdge, ProximityGraph, RankingRuleGraph,
|
||||
DeadEndPathCache, Edge, EdgeCondition, ProximityCondition, ProximityGraph, RankingRuleGraph,
|
||||
RankingRuleGraphTrait, TypoEdge, TypoGraph,
|
||||
};
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
@ -46,7 +46,7 @@ pub enum SearchEvents {
|
||||
paths: Vec<Vec<u16>>,
|
||||
empty_paths_cache: DeadEndPathCache<ProximityGraph>,
|
||||
universe: RoaringBitmap,
|
||||
distances: MappedInterner<Vec<(u16, SmallBitmap<ProximityEdge>)>, QueryNode>,
|
||||
distances: MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||
cost: u16,
|
||||
},
|
||||
TypoState {
|
||||
@ -172,7 +172,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||
paths_map: &[Vec<u16>],
|
||||
empty_paths_cache: &DeadEndPathCache<ProximityGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityEdge>)>, QueryNode>,
|
||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||
cost: u16,
|
||||
) {
|
||||
self.events.push(SearchEvents::ProximityState {
|
||||
|
@ -6,7 +6,7 @@ use roaring::RoaringBitmap;
|
||||
use super::interner::MappedInterner;
|
||||
use super::query_graph::QueryNode;
|
||||
use super::ranking_rule_graph::{
|
||||
DeadEndPathCache, ProximityEdge, ProximityGraph, RankingRuleGraph, TypoEdge, TypoGraph,
|
||||
DeadEndPathCache, ProximityCondition, ProximityGraph, RankingRuleGraph, TypoEdge, TypoGraph,
|
||||
};
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
use super::{RankingRule, RankingRuleQueryTrait};
|
||||
@ -68,7 +68,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
||||
paths: &[Vec<u16>],
|
||||
empty_paths_cache: &DeadEndPathCache<ProximityGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityEdge>)>, QueryNode>,
|
||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||
cost: u16,
|
||||
);
|
||||
|
||||
@ -139,7 +139,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||
_paths_map: &[Vec<u16>],
|
||||
_empty_paths_cache: &DeadEndPathCache<ProximityGraph>,
|
||||
_universe: &RoaringBitmap,
|
||||
_distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityEdge>)>, QueryNode>,
|
||||
_distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||
_cost: u16,
|
||||
) {
|
||||
}
|
||||
|
@ -303,16 +303,16 @@ mod tests {
|
||||
let mut ctx = SearchContext::new(&index, &txn);
|
||||
let results = execute_search(
|
||||
&mut ctx,
|
||||
// "which a the releases from poison by the government",
|
||||
"releases from poison by the government",
|
||||
// "sun flower s are the best",
|
||||
"zero config",
|
||||
// "zero config",
|
||||
TermsMatchingStrategy::Last,
|
||||
None,
|
||||
0,
|
||||
20,
|
||||
&mut DefaultSearchLogger,
|
||||
&mut DefaultSearchLogger,
|
||||
//&mut logger,
|
||||
// &mut logger,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -359,9 +359,9 @@ mod tests {
|
||||
let start = Instant::now();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("which a the releases from poison by the government");
|
||||
s.query("releases from poison by the government");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased);
|
||||
// s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased);
|
||||
let docs = s.execute().unwrap();
|
||||
|
||||
let elapsed = start.elapsed();
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use super::empty_paths_cache::DeadEndPathCache;
|
||||
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
@ -23,7 +24,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
cost: u16,
|
||||
all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
|
||||
empty_paths_cache: &mut DeadEndPathCache<G>,
|
||||
mut visit: impl FnMut(&[u16], &mut Self, &mut DeadEndPathCache<G>) -> Result<()>,
|
||||
mut visit: impl FnMut(&[u16], &mut Self, &mut DeadEndPathCache<G>) -> Result<ControlFlow<()>>,
|
||||
) -> Result<()> {
|
||||
let _ = self.visit_paths_of_cost_rec(
|
||||
from,
|
||||
@ -43,7 +44,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
cost: u16,
|
||||
all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
|
||||
empty_paths_cache: &mut DeadEndPathCache<G>,
|
||||
visit: &mut impl FnMut(&[u16], &mut Self, &mut DeadEndPathCache<G>) -> Result<()>,
|
||||
visit: &mut impl FnMut(&[u16], &mut Self, &mut DeadEndPathCache<G>) -> Result<ControlFlow<()>>,
|
||||
prev_conditions: &mut Vec<u16>,
|
||||
cur_path: &mut SmallBitmap<G::EdgeCondition>,
|
||||
forbidden_conditions: &mut SmallBitmap<G::EdgeCondition>,
|
||||
@ -60,7 +61,11 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
EdgeCondition::Unconditional => {
|
||||
if edge.dest_node == self.query_graph.end_node {
|
||||
any_valid = true;
|
||||
visit(prev_conditions, self, empty_paths_cache)?;
|
||||
let control_flow = visit(prev_conditions, self, empty_paths_cache)?;
|
||||
match control_flow {
|
||||
ControlFlow::Continue(_) => {}
|
||||
ControlFlow::Break(_) => return Ok(true),
|
||||
}
|
||||
true
|
||||
} else {
|
||||
self.visit_paths_of_cost_rec(
|
||||
@ -101,7 +106,11 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
);
|
||||
let next_any_valid = if edge.dest_node == self.query_graph.end_node {
|
||||
any_valid = true;
|
||||
visit(prev_conditions, self, empty_paths_cache)?;
|
||||
let control_flow = visit(prev_conditions, self, empty_paths_cache)?;
|
||||
match control_flow {
|
||||
ControlFlow::Continue(_) => {}
|
||||
ControlFlow::Break(_) => return Ok(true),
|
||||
}
|
||||
true
|
||||
} else {
|
||||
self.visit_paths_of_cost_rec(
|
||||
|
@ -20,7 +20,7 @@ use std::hash::Hash;
|
||||
|
||||
pub use edge_docids_cache::EdgeConditionDocIdsCache;
|
||||
pub use empty_paths_cache::DeadEndPathCache;
|
||||
pub use proximity::{ProximityEdge, ProximityGraph};
|
||||
pub use proximity::{ProximityCondition, ProximityGraph};
|
||||
use roaring::RoaringBitmap;
|
||||
pub use typo::{TypoEdge, TypoGraph};
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#![allow(clippy::too_many_arguments)]
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use super::ProximityEdge;
|
||||
use super::ProximityCondition;
|
||||
use crate::search::new::db_cache::DatabaseCache;
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_graph::QueryNodeData;
|
||||
@ -37,10 +37,10 @@ fn first_word_of_term_iter<'t>(
|
||||
|
||||
pub fn build_edges<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
conditions_interner: &mut DedupInterner<ProximityEdge>,
|
||||
conditions_interner: &mut DedupInterner<ProximityCondition>,
|
||||
from_node: &QueryNode,
|
||||
to_node: &QueryNode,
|
||||
) -> Result<Vec<(u8, EdgeCondition<ProximityEdge>)>> {
|
||||
) -> Result<Vec<(u8, EdgeCondition<ProximityCondition>)>> {
|
||||
let SearchContext {
|
||||
index,
|
||||
txn,
|
||||
@ -51,24 +51,33 @@ pub fn build_edges<'ctx>(
|
||||
term_docids: _,
|
||||
} = ctx;
|
||||
|
||||
let (left_term, left_end_position) = match &from_node.data {
|
||||
QueryNodeData::Term(LocatedQueryTerm { value, positions }) => {
|
||||
(term_interner.get(*value), *positions.end())
|
||||
}
|
||||
QueryNodeData::Deleted => return Ok(vec![]),
|
||||
QueryNodeData::Start => return Ok(vec![(0, EdgeCondition::Unconditional)]),
|
||||
QueryNodeData::End => return Ok(vec![]),
|
||||
};
|
||||
|
||||
let right_term = match &to_node.data {
|
||||
QueryNodeData::End => return Ok(vec![(0, EdgeCondition::Unconditional)]),
|
||||
QueryNodeData::Deleted | QueryNodeData::Start => return Ok(vec![]),
|
||||
QueryNodeData::Term(term) => term,
|
||||
};
|
||||
let LocatedQueryTerm { value: right_value, positions: right_positions } = right_term;
|
||||
|
||||
let LocatedQueryTerm { value: right_term_interned, positions: right_positions } = right_term;
|
||||
|
||||
let (right_term, right_start_position, right_ngram_length) =
|
||||
(term_interner.get(*right_value), *right_positions.start(), right_positions.len());
|
||||
(term_interner.get(*right_term_interned), *right_positions.start(), right_positions.len());
|
||||
|
||||
let (left_term, left_end_position) = match &from_node.data {
|
||||
QueryNodeData::Term(LocatedQueryTerm { value, positions }) => {
|
||||
(term_interner.get(*value), *positions.end())
|
||||
}
|
||||
QueryNodeData::Deleted => return Ok(vec![]),
|
||||
QueryNodeData::Start => {
|
||||
return Ok(vec![(
|
||||
(right_ngram_length - 1) as u8,
|
||||
EdgeCondition::Conditional(
|
||||
conditions_interner
|
||||
.insert(ProximityCondition::Term { term: *right_term_interned }),
|
||||
),
|
||||
)])
|
||||
}
|
||||
QueryNodeData::End => return Ok(vec![]),
|
||||
};
|
||||
|
||||
if left_end_position + 1 != right_start_position {
|
||||
// We want to ignore this pair of terms
|
||||
@ -77,7 +86,12 @@ pub fn build_edges<'ctx>(
|
||||
// `flowers` is removed by the `words` ranking rule.
|
||||
// The remaining query graph represents `the sun .. are beautiful`
|
||||
// but `sun` and `are` have no proximity condition between them
|
||||
return Ok(vec![(0, EdgeCondition::Unconditional)]);
|
||||
return Ok(vec![(
|
||||
(right_ngram_length - 1) as u8,
|
||||
EdgeCondition::Conditional(
|
||||
conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned }),
|
||||
),
|
||||
)]);
|
||||
}
|
||||
|
||||
let mut cost_proximity_word_pairs = BTreeMap::<u8, BTreeMap<u8, Vec<WordPair>>>::new();
|
||||
@ -121,24 +135,30 @@ pub fn build_edges<'ctx>(
|
||||
}
|
||||
}
|
||||
|
||||
let mut new_edges =
|
||||
cost_proximity_word_pairs
|
||||
.into_iter()
|
||||
.flat_map(|(cost, proximity_word_pairs)| {
|
||||
let mut edges = vec![];
|
||||
for (proximity, word_pairs) in proximity_word_pairs {
|
||||
edges.push((
|
||||
cost,
|
||||
EdgeCondition::Conditional(conditions_interner.insert(ProximityEdge {
|
||||
let mut new_edges = cost_proximity_word_pairs
|
||||
.into_iter()
|
||||
.flat_map(|(cost, proximity_word_pairs)| {
|
||||
let mut edges = vec![];
|
||||
for (proximity, word_pairs) in proximity_word_pairs {
|
||||
edges.push((
|
||||
cost,
|
||||
EdgeCondition::Conditional(conditions_interner.insert(
|
||||
ProximityCondition::Pairs {
|
||||
pairs: word_pairs.into_boxed_slice(),
|
||||
proximity,
|
||||
})),
|
||||
))
|
||||
}
|
||||
edges
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
new_edges.push((8 + (right_ngram_length - 1) as u8, EdgeCondition::Unconditional));
|
||||
},
|
||||
)),
|
||||
))
|
||||
}
|
||||
edges
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
new_edges.push((
|
||||
8 + (right_ngram_length - 1) as u8,
|
||||
EdgeCondition::Conditional(
|
||||
conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned }),
|
||||
),
|
||||
));
|
||||
Ok(new_edges)
|
||||
}
|
||||
|
||||
|
@ -1,16 +1,39 @@
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{ProximityEdge, WordPair};
|
||||
use super::{ProximityCondition, WordPair};
|
||||
use crate::search::new::SearchContext;
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
|
||||
pub fn compute_docids<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
edge: &ProximityEdge,
|
||||
edge: &ProximityCondition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let SearchContext { index, txn, db_cache, word_interner, .. } = ctx;
|
||||
let ProximityEdge { pairs, proximity } = edge;
|
||||
let SearchContext {
|
||||
index,
|
||||
txn,
|
||||
db_cache,
|
||||
word_interner,
|
||||
term_docids,
|
||||
phrase_interner,
|
||||
term_interner,
|
||||
} = ctx;
|
||||
let (pairs, proximity) = match edge {
|
||||
ProximityCondition::Term { term } => {
|
||||
return term_docids
|
||||
.get_query_term_docids(
|
||||
index,
|
||||
txn,
|
||||
db_cache,
|
||||
word_interner,
|
||||
term_interner,
|
||||
phrase_interner,
|
||||
*term,
|
||||
)
|
||||
.cloned()
|
||||
}
|
||||
ProximityCondition::Pairs { pairs, proximity } => (pairs, proximity),
|
||||
};
|
||||
let mut pair_docids = RoaringBitmap::new();
|
||||
for pair in pairs.iter() {
|
||||
let pair = match pair {
|
||||
|
@ -4,15 +4,15 @@ pub mod compute_docids;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::empty_paths_cache::DeadEndPathCache;
|
||||
use super::{EdgeCondition, RankingRuleGraphTrait};
|
||||
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
|
||||
use crate::search::new::logger::SearchLogger;
|
||||
use crate::search::new::query_term::Phrase;
|
||||
use crate::search::new::query_term::{Phrase, QueryTerm};
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum WordPair {
|
||||
Words {
|
||||
phrases: Vec<Interned<Phrase>>,
|
||||
@ -31,27 +31,33 @@ pub enum WordPair {
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct ProximityEdge {
|
||||
pairs: Box<[WordPair]>,
|
||||
proximity: u8,
|
||||
pub enum ProximityCondition {
|
||||
Term { term: Interned<QueryTerm> },
|
||||
Pairs { pairs: Box<[WordPair]>, proximity: u8 },
|
||||
}
|
||||
|
||||
pub enum ProximityGraph {}
|
||||
|
||||
impl RankingRuleGraphTrait for ProximityGraph {
|
||||
type EdgeCondition = ProximityEdge;
|
||||
type EdgeCondition = ProximityCondition;
|
||||
|
||||
fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String {
|
||||
let ProximityEdge { pairs, proximity } = edge;
|
||||
format!(", prox {proximity}, {} pairs", pairs.len())
|
||||
match edge {
|
||||
ProximityCondition::Term { term } => {
|
||||
format!("term {term}")
|
||||
}
|
||||
ProximityCondition::Pairs { pairs, proximity } => {
|
||||
format!("prox {proximity}, {} pairs", pairs.len())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_edge_condition<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
edge: &Self::EdgeCondition,
|
||||
condition: &Self::EdgeCondition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<roaring::RoaringBitmap> {
|
||||
compute_docids::compute_docids(ctx, edge, universe)
|
||||
compute_docids::compute_docids(ctx, condition, universe)
|
||||
}
|
||||
|
||||
fn build_edges<'ctx>(
|
||||
@ -64,11 +70,11 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
||||
}
|
||||
|
||||
fn log_state(
|
||||
graph: &super::RankingRuleGraph<Self>,
|
||||
graph: &RankingRuleGraph<Self>,
|
||||
paths: &[Vec<u16>],
|
||||
empty_paths_cache: &DeadEndPathCache<Self>,
|
||||
universe: &RoaringBitmap,
|
||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityEdge>)>, QueryNode>,
|
||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||
cost: u16,
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
) {
|
||||
|
Loading…
Reference in New Issue
Block a user