Fix bug in exact_attribute rr implementation

This commit is contained in:
Loïc Lecrenier 2023-04-26 10:40:05 +02:00
parent 3421125a55
commit 55bad07c16
2 changed files with 24 additions and 23 deletions

View File

@ -91,13 +91,14 @@ impl State {
universe: &RoaringBitmap, universe: &RoaringBitmap,
query_graph: &QueryGraph, query_graph: &QueryGraph,
) -> Result<Self> { ) -> Result<Self> {
// An ordered list of the (remaining) query terms, with data extracted from them: struct ExactTermInfo {
// 0. exact subterm. If it doesn't exist, the term is skipped. exact_term: ExactTerm,
// 1. start position of the term start_position: u16,
// 2. id of the term start_term_id: u8,
let mut count_all_positions = 0; position_count: usize,
}
let mut exact_term_position_ids: Vec<(ExactTerm, u16, u8)> = let mut exact_terms: Vec<ExactTermInfo> =
Vec::with_capacity(query_graph.nodes.len() as usize); Vec::with_capacity(query_graph.nodes.len() as usize);
for (_, node) in query_graph.nodes.iter() { for (_, node) in query_graph.nodes.iter() {
match &node.data { match &node.data {
@ -107,34 +108,35 @@ impl State {
} else { } else {
continue; continue;
}; };
count_all_positions += term.positions.len(); exact_terms.push(ExactTermInfo {
exact_term_position_ids.push((
exact_term, exact_term,
*term.positions.start(), start_position: *term.positions.start(),
*term.term_ids.start(), start_term_id: *term.term_ids.start(),
)) position_count: term.positions.len(),
});
} }
QueryNodeData::Deleted | QueryNodeData::Start | QueryNodeData::End => continue, QueryNodeData::Deleted | QueryNodeData::Start | QueryNodeData::End => continue,
} }
} }
exact_term_position_ids.sort_by_key(|(_, _, id)| *id); exact_terms.sort_by_key(|x| x.start_term_id);
exact_term_position_ids.dedup_by_key(|(_, _, id)| *id); exact_terms.dedup_by_key(|x| x.start_term_id);
let count_all_positions = exact_terms.iter().fold(0, |acc, x| acc + x.position_count);
// bail if there is a "hole" (missing word) in remaining query graph // bail if there is a "hole" (missing word) in remaining query graph
if let Some((_, _, first_id)) = exact_term_position_ids.first() { if let Some(e) = exact_terms.first() {
if *first_id != 0 { if e.start_term_id != 0 {
return Ok(State::Empty(query_graph.clone())); return Ok(State::Empty(query_graph.clone()));
} }
} else { } else {
return Ok(State::Empty(query_graph.clone())); return Ok(State::Empty(query_graph.clone()));
} }
let mut previous_id = 0; let mut previous_id = 0;
for (_, _, id) in exact_term_position_ids.iter().copied() { for e in exact_terms.iter() {
if id < previous_id || id - previous_id > 1 { if e.start_term_id < previous_id || e.start_term_id - previous_id > 1 {
return Ok(State::Empty(query_graph.clone())); return Ok(State::Empty(query_graph.clone()));
} else { } else {
previous_id = id; previous_id = e.start_term_id;
} }
} }
@ -147,10 +149,9 @@ impl State {
// first check that for each term, there exists some attribute that has this term at the correct position // first check that for each term, there exists some attribute that has this term at the correct position
//"word-position-docids"; //"word-position-docids";
let mut candidates = universe.clone(); let mut candidates = universe.clone();
let words_positions: Vec<(Vec<_>, _)> = exact_term_position_ids let words_positions: Vec<(Vec<_>, _)> = exact_terms
.iter() .iter()
.copied() .map(|e| (e.exact_term.interned_words(ctx).collect(), e.start_position))
.map(|(term, position, _)| (term.interned_words(ctx).collect(), position))
.collect(); .collect();
for (words, position) in &words_positions { for (words, position) in &words_positions {
if candidates.is_empty() { if candidates.is_empty() {

View File

@ -134,7 +134,7 @@
"typo_rank": 0, "typo_rank": 0,
"proximity_rank": 0, "proximity_rank": 0,
"attribute_rank": 1, "attribute_rank": 1,
"exact_rank": 3, "exact_rank": 1,
"asc_desc_rank": 5, "asc_desc_rank": 5,
"sort_by_rank": 2, "sort_by_rank": 2,
"geo_rank": 34692, "geo_rank": 34692,
@ -369,7 +369,7 @@
"typo_rank": 0, "typo_rank": 0,
"proximity_rank": 0, "proximity_rank": 0,
"attribute_rank": 1, "attribute_rank": 1,
"exact_rank": 2, "exact_rank": 0,
"asc_desc_rank": 2, "asc_desc_rank": 2,
"sort_by_rank": 1, "sort_by_rank": 1,
"geo_rank": 9339230, "geo_rank": 9339230,