From 93ba0510942014c8623a9d52296f84f3aea831c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 10 Jul 2024 10:03:00 +0200 Subject: [PATCH] Remove the invalid get_phrases_docids universe parameter --- .../new/ranking_rule_graph/exactness/mod.rs | 2 +- .../proximity/compute_docids.rs | 6 +++--- milli/src/search/new/resolve_query_graph.rs | 21 +++++++------------ 3 files changed, 12 insertions(+), 17 deletions(-) diff --git a/milli/src/search/new/ranking_rule_graph/exactness/mod.rs b/milli/src/search/new/ranking_rule_graph/exactness/mod.rs index 31f6315d7..e0c2294e5 100644 --- a/milli/src/search/new/ranking_rule_graph/exactness/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/exactness/mod.rs @@ -29,7 +29,7 @@ fn compute_docids( let candidates = match exact_term { // TODO I move the intersection here - ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(None, phrase)? & universe, + ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(phrase)? & universe, ExactTerm::Word(word) => { ctx.word_docids(Some(universe), Word::Original(word))?.unwrap_or_default() } diff --git a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs index 74bef506a..4ef83b534 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs @@ -74,7 +74,7 @@ pub fn compute_docids( if right_derivs.len() > 1 { let universe = &universe; if let Some(left_phrase) = left_phrase { - if universe.is_disjoint(ctx.get_phrase_docids(None, left_phrase)?) { + if universe.is_disjoint(ctx.get_phrase_docids(left_phrase)?) { continue; } } else if let Some(left_word_docids) = ctx.word_docids(Some(universe), left_word)? { @@ -126,7 +126,7 @@ fn compute_prefix_edges( // TODO we can clearly give the universe to this method // Unfortunately, it is deserializing/computing stuff and // keeping the result as a materialized bitmap. - let phrase_docids = ctx.get_phrase_docids(None, phrase)?; + let phrase_docids = ctx.get_phrase_docids(phrase)?; if !phrase_docids.is_empty() { used_left_phrases.insert(phrase); } @@ -184,7 +184,7 @@ fn compute_non_prefix_edges( let mut universe = universe.clone(); for phrase in left_phrase.iter().chain(right_phrase.iter()).copied() { - universe &= ctx.get_phrase_docids(None, phrase)?; + universe &= ctx.get_phrase_docids(phrase)?; if universe.is_empty() { return Ok(()); } diff --git a/milli/src/search/new/resolve_query_graph.rs b/milli/src/search/new/resolve_query_graph.rs index d9fdac86e..7a47b0a66 100644 --- a/milli/src/search/new/resolve_query_graph.rs +++ b/milli/src/search/new/resolve_query_graph.rs @@ -19,15 +19,11 @@ pub struct PhraseDocIdsCache { } impl<'ctx> SearchContext<'ctx> { /// Get the document ids associated with the given phrase - pub fn get_phrase_docids( - &mut self, - universe: Option<&RoaringBitmap>, - phrase: Interned, - ) -> Result<&RoaringBitmap> { + pub fn get_phrase_docids(&mut self, phrase: Interned) -> Result<&RoaringBitmap> { if self.phrase_docids.cache.contains_key(&phrase) { return Ok(&self.phrase_docids.cache[&phrase]); }; - let docids = compute_phrase_docids(self, universe, phrase)?; + let docids = compute_phrase_docids(self, phrase)?; // TODO can we improve that? Because there is an issue, we keep that in cache... let _ = self.phrase_docids.cache.insert(phrase, docids); let docids = &self.phrase_docids.cache[&phrase]; @@ -47,7 +43,7 @@ pub fn compute_query_term_subset_docids( } } for phrase in term.all_phrases(ctx)? { - docids |= ctx.get_phrase_docids(None, phrase)?; + docids |= ctx.get_phrase_docids(phrase)?; } if let Some(prefix) = term.use_prefix_db(ctx) { @@ -80,7 +76,7 @@ pub fn compute_query_term_subset_docids_within_field_id( // guaranteed that all of its words are within a single fid. if let Some(word) = phrase.words(ctx).iter().flatten().next() { if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(universe, *word, fid)? { - docids |= ctx.get_phrase_docids(None, phrase)? & word_fid_docids; + docids |= ctx.get_phrase_docids(phrase)? & word_fid_docids; } } } @@ -118,7 +114,7 @@ pub fn compute_query_term_subset_docids_within_position( if let Some(word_position_docids) = ctx.get_db_word_position_docids(universe, *word, position)? { - docids |= ctx.get_phrase_docids(None, phrase)? & word_position_docids; + docids |= ctx.get_phrase_docids(phrase)? & word_position_docids; } } } @@ -190,7 +186,6 @@ pub fn compute_query_graph_docids( pub fn compute_phrase_docids( ctx: &mut SearchContext<'_>, - universe: Option<&RoaringBitmap>, phrase: Interned, ) -> Result { let Phrase { words } = ctx.phrase_interner.get(phrase).clone(); @@ -200,7 +195,7 @@ pub fn compute_phrase_docids( } let mut candidates = RoaringBitmap::new(); for word in words.iter().flatten().copied() { - if let Some(word_docids) = ctx.word_docids(universe, Word::Original(word))? { + if let Some(word_docids) = ctx.word_docids(None, Word::Original(word))? { candidates |= word_docids; } else { return Ok(RoaringBitmap::new()); @@ -224,7 +219,7 @@ pub fn compute_phrase_docids( .filter_map(|(index, word)| word.as_ref().map(|word| (index, word))) { if dist == 0 { - match ctx.get_db_word_pair_proximity_docids(universe, s1, s2, 1)? { + match ctx.get_db_word_pair_proximity_docids(None, s1, s2, 1)? { Some(m) => bitmaps.push(m), // If there are no documents for this pair, there will be no // results for the phrase query. @@ -234,7 +229,7 @@ pub fn compute_phrase_docids( let mut bitmap = RoaringBitmap::new(); for dist in 0..=dist { if let Some(m) = - ctx.get_db_word_pair_proximity_docids(universe, s1, s2, dist as u8 + 1)? + ctx.get_db_word_pair_proximity_docids(None, s1, s2, dist as u8 + 1)? { bitmap |= m; }