diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs index 09845377c..aa1c11773 100644 --- a/milli/src/search/new/db_cache.rs +++ b/milli/src/search/new/db_cache.rs @@ -89,7 +89,7 @@ impl<'ctx> SearchContext<'ctx> { } /// Retrieve or insert the given value in the `word_docids` database. - pub fn get_db_word_docids(&mut self, word: Interned) -> Result> { + fn get_db_word_docids(&mut self, word: Interned) -> Result> { DatabaseCache::get_value( self.txn, word, diff --git a/milli/src/search/new/logger/visual.rs b/milli/src/search/new/logger/visual.rs index 17f7ef76c..068b5ad68 100644 --- a/milli/src/search/new/logger/visual.rs +++ b/milli/src/search/new/logger/visual.rs @@ -427,7 +427,7 @@ fill: \"#B6E2D3\" )?; for w in term_subset.all_single_words_except_prefix_db(ctx)? { - let w = ctx.word_interner.get(w); + let w = ctx.word_interner.get(w.interned()); writeln!(file, "{w}: word")?; } for p in term_subset.all_phrases(ctx)? { diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index f51d3771d..9f8d8699f 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -50,6 +50,8 @@ use ranking_rules::{BoxRankingRule, RankingRule}; use resolve_query_graph::compute_query_graph_docids; use sort::Sort; +use self::interner::Interned; + /// A structure used throughout the execution of a search query. pub struct SearchContext<'ctx> { pub index: &'ctx Index, diff --git a/milli/src/search/new/query_term/mod.rs b/milli/src/search/new/query_term/mod.rs index 83320139b..0a0d1a7eb 100644 --- a/milli/src/search/new/query_term/mod.rs +++ b/milli/src/search/new/query_term/mod.rs @@ -3,18 +3,18 @@ mod ntypo_subset; mod parse_query; mod phrase; -use super::interner::{DedupInterner, Interned}; -use super::{limits, SearchContext}; -use crate::Result; use std::collections::BTreeSet; use std::ops::RangeInclusive; +use compute_derivations::partially_initialized_term_from_word; use either::Either; pub use ntypo_subset::NTypoTermSubset; pub use parse_query::{located_query_terms_from_string, make_ngram, number_of_typos_allowed}; pub use phrase::Phrase; -use compute_derivations::partially_initialized_term_from_word; +use super::interner::{DedupInterner, Interned}; +use super::{limits, SearchContext, Word}; +use crate::Result; /// A set of word derivations attached to a location in the search query. #[derive(Clone, PartialEq, Eq, Hash)] @@ -180,7 +180,7 @@ impl QueryTermSubset { pub fn all_single_words_except_prefix_db( &self, ctx: &mut SearchContext, - ) -> Result>> { + ) -> Result> { let mut result = BTreeSet::default(); // TODO: a compute_partially funtion if !self.one_typo_subset.is_empty() || !self.two_typo_subset.is_empty() { @@ -197,8 +197,20 @@ impl QueryTermSubset { synonyms: _, use_prefix_db: _, } = &original.zero_typo; - result.extend(zero_typo.iter().copied()); - result.extend(prefix_of.iter().copied()); + result.extend(zero_typo.iter().copied().map(|w| { + if original.ngram_words.is_some() { + Word::Derived(w) + } else { + Word::Original(w) + } + })); + result.extend(prefix_of.iter().copied().map(|w| { + if original.ngram_words.is_some() { + Word::Derived(w) + } else { + Word::Original(w) + } + })); } NTypoTermSubset::Subset { words, phrases: _ } => { let ZeroTypoTerm { @@ -210,10 +222,14 @@ impl QueryTermSubset { } = &original.zero_typo; if let Some(zero_typo) = zero_typo { if words.contains(zero_typo) { - result.insert(*zero_typo); + if original.ngram_words.is_some() { + result.insert(Word::Derived(*zero_typo)); + } else { + result.insert(Word::Original(*zero_typo)); + } } } - result.extend(prefix_of.intersection(words).copied()); + result.extend(prefix_of.intersection(words).copied().map(Word::Derived)); } NTypoTermSubset::Nothing => {} } @@ -223,13 +239,13 @@ impl QueryTermSubset { let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo else { panic!() }; - result.extend(one_typo.iter().copied()) + result.extend(one_typo.iter().copied().map(Word::Derived)) } NTypoTermSubset::Subset { words, phrases: _ } => { let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo else { panic!() }; - result.extend(one_typo.intersection(words)); + result.extend(one_typo.intersection(words).copied().map(Word::Derived)); } NTypoTermSubset::Nothing => {} }; @@ -239,13 +255,13 @@ impl QueryTermSubset { let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else { panic!() }; - result.extend(two_typos.iter().copied()); + result.extend(two_typos.iter().copied().map(Word::Derived)); } NTypoTermSubset::Subset { words, phrases: _ } => { let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else { panic!() }; - result.extend(two_typos.intersection(words)); + result.extend(two_typos.intersection(words).copied().map(Word::Derived)); } NTypoTermSubset::Nothing => {} }; diff --git a/milli/src/search/new/ranking_rule_graph/exactness/mod.rs b/milli/src/search/new/ranking_rule_graph/exactness/mod.rs index 4a3dd6549..7455a7a17 100644 --- a/milli/src/search/new/ranking_rule_graph/exactness/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/exactness/mod.rs @@ -3,7 +3,8 @@ use roaring::RoaringBitmap; use super::{ComputedCondition, RankingRuleGraphTrait}; use crate::search::new::interner::{DedupInterner, Interned}; use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset}; -use crate::{Result, RoaringBitmapCodec, SearchContext}; +use crate::search::new::Word; +use crate::{Result, SearchContext}; #[derive(Clone, PartialEq, Eq, Hash)] pub enum ExactnessCondition { @@ -26,7 +27,7 @@ fn compute_docids( let mut candidates = match exact_term { ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(phrase)?.clone(), ExactTerm::Word(word) => { - if let Some(word_candidates) = ctx.get_db_word_docids(word)? { + if let Some(word_candidates) = ctx.word_docids(Word::Original(word))? { word_candidates } else { return Ok(Default::default()); diff --git a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs index b6f164f16..760c7272c 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs @@ -9,7 +9,7 @@ use crate::search::new::interner::Interned; use crate::search::new::query_term::{Phrase, QueryTermSubset}; use crate::search::new::ranking_rule_graph::ComputedCondition; use crate::search::new::resolve_query_graph::compute_query_term_subset_docids; -use crate::search::new::SearchContext; +use crate::search::new::{SearchContext, Word}; use crate::Result; pub fn compute_docids( @@ -54,7 +54,7 @@ pub fn compute_docids( { compute_prefix_edges( ctx, - left_word, + left_word.interned(), right_prefix, left_phrase, forward_proximity, @@ -91,7 +91,7 @@ pub fn compute_docids( if universe.is_disjoint(ctx.get_phrase_docids(left_phrase)?) { continue; } - } else if let Some(left_word_docids) = ctx.get_db_word_docids(left_word)? { + } else if let Some(left_word_docids) = ctx.word_docids(left_word)? { if universe.is_disjoint(&left_word_docids) { continue; } @@ -101,7 +101,7 @@ pub fn compute_docids( for (right_word, right_phrase) in right_derivs { compute_non_prefix_edges( ctx, - left_word, + left_word.interned(), right_word, left_phrase, right_phrase, @@ -243,7 +243,7 @@ fn compute_non_prefix_edges( fn last_words_of_term_derivations( ctx: &mut SearchContext, t: &QueryTermSubset, -) -> Result>, Interned)>> { +) -> Result>, Word)>> { let mut result = BTreeSet::new(); for w in t.all_single_words_except_prefix_db(ctx)? { @@ -253,7 +253,7 @@ fn last_words_of_term_derivations( let phrase = ctx.phrase_interner.get(p); let last_term_of_phrase = phrase.words.last().unwrap(); if let Some(last_word) = last_term_of_phrase { - result.insert((Some(p), *last_word)); + result.insert((Some(p), Word::Original(*last_word))); } } @@ -266,7 +266,7 @@ fn first_word_of_term_iter( let mut result = BTreeSet::new(); let all_words = t.all_single_words_except_prefix_db(ctx)?; for w in all_words { - result.insert((w, None)); + result.insert((w.interned(), None)); } for p in t.all_phrases(ctx)? { let phrase = ctx.phrase_interner.get(p); diff --git a/milli/src/search/new/resolve_query_graph.rs b/milli/src/search/new/resolve_query_graph.rs index bca8b6268..c78f0c5ee 100644 --- a/milli/src/search/new/resolve_query_graph.rs +++ b/milli/src/search/new/resolve_query_graph.rs @@ -9,7 +9,7 @@ use super::interner::Interned; use super::query_graph::QueryNodeData; use super::query_term::{Phrase, QueryTermSubset}; use super::small_bitmap::SmallBitmap; -use super::{QueryGraph, SearchContext}; +use super::{QueryGraph, SearchContext, Word}; use crate::search::new::query_term::LocatedQueryTermSubset; use crate::Result; @@ -35,7 +35,7 @@ pub fn compute_query_term_subset_docids( ) -> Result { let mut docids = RoaringBitmap::new(); for word in term.all_single_words_except_prefix_db(ctx)? { - if let Some(word_docids) = ctx.get_db_word_docids(word)? { + if let Some(word_docids) = ctx.word_docids(word)? { docids |= word_docids; } } @@ -125,7 +125,7 @@ pub fn compute_phrase_docids( } if words.len() == 1 { if let Some(word) = &words[0] { - if let Some(word_docids) = ctx.get_db_word_docids(*word)? { + if let Some(word_docids) = ctx.word_docids(Word::Original(*word))? { return Ok(word_docids); } else { return Ok(RoaringBitmap::new());