From d6585eb10bb83be437bec6108e67280bf59bba9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 6 Apr 2023 15:50:11 +0200 Subject: [PATCH] Avoid splitting ngrams into their original component words --- .../new/query_term/compute_derivations.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/milli/src/search/new/query_term/compute_derivations.rs b/milli/src/search/new/query_term/compute_derivations.rs index 695c57f08..03d92572e 100644 --- a/milli/src/search/new/query_term/compute_derivations.rs +++ b/milli/src/search/new/query_term/compute_derivations.rs @@ -293,9 +293,26 @@ impl Interned { })?; let original_str = ctx.word_interner.get(original).to_owned(); let split_words = find_split_words(ctx, original_str.as_str())?; - let one_typo = OneTypoTerm { split_words, one_typo: one_typo_words }; let self_mut = ctx.term_interner.get_mut(self); + + // Only add the split words to the derivations if: + // 1. the term is not an ngram; OR + // 2. the term is an ngram, but the split words are different from the ngram's component words + let split_words = if let Some((ngram_words, split_words)) = + self_mut.ngram_words.as_ref().zip(split_words.as_ref()) + { + let Phrase { words } = ctx.phrase_interner.get(*split_words); + if ngram_words.iter().ne(words.iter().flatten()) { + Some(*split_words) + } else { + None + } + } else { + split_words + }; + let one_typo = OneTypoTerm { split_words, one_typo: one_typo_words }; + self_mut.one_typo = Lazy::Init(one_typo); Ok(())