3726: Fix prefix highlighting r=loiclec a=ManyTheFish

The prefix queries were not properly highlighted, this PR now highlights only the start of a word when it matched with a prefix

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
This commit is contained in:
meili-bors[bot] 2023-05-08 07:46:46 +00:00 committed by GitHub
commit eace6df91b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 45 additions and 28 deletions

View File

@ -52,7 +52,7 @@ impl MatchingWords {
words.push(LocatedMatchingWords { words.push(LocatedMatchingWords {
value: matching_words, value: matching_words,
positions: located_term.positions.clone(), positions: located_term.positions.clone(),
is_prefix: term.is_cached_prefix(), is_prefix: term.is_prefix(),
original_char_count: term.original_word(&ctx).chars().count(), original_char_count: term.original_word(&ctx).chars().count(),
}); });
} }
@ -244,6 +244,8 @@ pub(crate) mod tests {
temp_index temp_index
.add_documents(documents!([ .add_documents(documents!([
{ "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" }, { "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" },
{ "id": 2, "name": "Westfália" },
{ "id": 3, "name": "Ŵôřlḑôle" },
])) ]))
.unwrap(); .unwrap();
temp_index temp_index
@ -305,7 +307,7 @@ pub(crate) mod tests {
..Default::default() ..Default::default()
}) })
.next(), .next(),
None Some(MatchType::Full { char_len: 5, ids: &(2..=2) })
); );
assert_eq!( assert_eq!(
matching_words matching_words

View File

@ -499,17 +499,36 @@ mod tests {
use charabia::TokenizerBuilder; use charabia::TokenizerBuilder;
use matching_words::tests::temp_index_with_documents; use matching_words::tests::temp_index_with_documents;
use super::super::located_query_terms_from_tokens;
use super::*; use super::*;
use crate::SearchContext; use crate::index::tests::TempIndex;
use crate::{execute_search, SearchContext};
impl<'a> MatcherBuilder<'a, &[u8]> { impl<'a> MatcherBuilder<'a, &[u8]> {
pub fn new_test(mut ctx: SearchContext, query: &'a str) -> Self { fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
let tokenizer = TokenizerBuilder::new().build(); let mut ctx = SearchContext::new(index, rtxn);
let tokens = tokenizer.tokenize(query); let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
let query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap(); &mut ctx,
let matching_words = MatchingWords::new(ctx, query_terms); &Some(query.to_string()),
Self::new(matching_words, TokenizerBuilder::new().build()) crate::TermsMatchingStrategy::default(),
false,
&None,
&None,
crate::search::new::GeoSortStrategy::default(),
0,
100,
Some(10),
&mut crate::DefaultSearchLogger,
&mut crate::DefaultSearchLogger,
)
.unwrap();
// consume context and located_query_terms to build MatchingWords.
let matching_words = match located_query_terms {
Some(located_query_terms) => MatchingWords::new(ctx, located_query_terms),
None => MatchingWords::default(),
};
MatcherBuilder::new(matching_words, TokenizerBuilder::new().build())
} }
} }
@ -517,8 +536,7 @@ mod tests {
fn format_identity() { fn format_identity() {
let temp_index = temp_index_with_documents(); let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap(); let rtxn = temp_index.read_txn().unwrap();
let ctx = SearchContext::new(&temp_index, &rtxn); let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
let builder = MatcherBuilder::new_test(ctx, "split the world");
let format_options = FormatOptions { highlight: false, crop: None }; let format_options = FormatOptions { highlight: false, crop: None };
@ -545,8 +563,7 @@ mod tests {
fn format_highlight() { fn format_highlight() {
let temp_index = temp_index_with_documents(); let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap(); let rtxn = temp_index.read_txn().unwrap();
let ctx = SearchContext::new(&temp_index, &rtxn); let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
let builder = MatcherBuilder::new_test(ctx, "split the world");
let format_options = FormatOptions { highlight: true, crop: None }; let format_options = FormatOptions { highlight: true, crop: None };
@ -589,8 +606,7 @@ mod tests {
fn highlight_unicode() { fn highlight_unicode() {
let temp_index = temp_index_with_documents(); let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap(); let rtxn = temp_index.read_txn().unwrap();
let ctx = SearchContext::new(&temp_index, &rtxn); let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "world");
let builder = MatcherBuilder::new_test(ctx, "world");
let format_options = FormatOptions { highlight: true, crop: None }; let format_options = FormatOptions { highlight: true, crop: None };
// Text containing prefix match. // Text containing prefix match.
@ -599,7 +615,7 @@ mod tests {
// no crop should return complete text with highlighted matches. // no crop should return complete text with highlighted matches.
insta::assert_snapshot!( insta::assert_snapshot!(
matcher.format(format_options), matcher.format(format_options),
@"<em>Ŵôřlḑôle</em>" @"<em>Ŵôřlḑ</em>ôle"
); );
// Text containing unicode match. // Text containing unicode match.
@ -611,8 +627,7 @@ mod tests {
@"<em>Ŵôřlḑ</em>" @"<em>Ŵôřlḑ</em>"
); );
let ctx = SearchContext::new(&temp_index, &rtxn); let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "westfali");
let builder = MatcherBuilder::new_test(ctx, "westfali");
let format_options = FormatOptions { highlight: true, crop: None }; let format_options = FormatOptions { highlight: true, crop: None };
// Text containing unicode match. // Text containing unicode match.
@ -621,7 +636,7 @@ mod tests {
// no crop should return complete text with highlighted matches. // no crop should return complete text with highlighted matches.
insta::assert_snapshot!( insta::assert_snapshot!(
matcher.format(format_options), matcher.format(format_options),
@"<em>Westfália</em>" @"<em>Westfáli</em>a"
); );
} }
@ -629,8 +644,7 @@ mod tests {
fn format_crop() { fn format_crop() {
let temp_index = temp_index_with_documents(); let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap(); let rtxn = temp_index.read_txn().unwrap();
let ctx = SearchContext::new(&temp_index, &rtxn); let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
let builder = MatcherBuilder::new_test(ctx, "split the world");
let format_options = FormatOptions { highlight: false, crop: Some(10) }; let format_options = FormatOptions { highlight: false, crop: Some(10) };
@ -727,8 +741,7 @@ mod tests {
fn format_highlight_crop() { fn format_highlight_crop() {
let temp_index = temp_index_with_documents(); let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap(); let rtxn = temp_index.read_txn().unwrap();
let ctx = SearchContext::new(&temp_index, &rtxn); let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
let builder = MatcherBuilder::new_test(ctx, "split the world");
let format_options = FormatOptions { highlight: true, crop: Some(10) }; let format_options = FormatOptions { highlight: true, crop: Some(10) };
@ -790,8 +803,7 @@ mod tests {
//! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295 //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
let temp_index = temp_index_with_documents(); let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap(); let rtxn = temp_index.read_txn().unwrap();
let ctx = SearchContext::new(&temp_index, &rtxn); let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
let builder = MatcherBuilder::new_test(ctx, "split the world");
let text = "void void split the world void void."; let text = "void void split the world void void.";
@ -827,8 +839,8 @@ mod tests {
fn partial_matches() { fn partial_matches() {
let temp_index = temp_index_with_documents(); let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap(); let rtxn = temp_index.read_txn().unwrap();
let ctx = SearchContext::new(&temp_index, &rtxn); let mut builder =
let mut builder = MatcherBuilder::new_test(ctx, "the \"t he\" door \"do or\""); MatcherBuilder::new_test(&rtxn, &temp_index, "the \"t he\" door \"do or\"");
builder.highlight_prefix("_".to_string()); builder.highlight_prefix("_".to_string());
builder.highlight_suffix("_".to_string()); builder.highlight_suffix("_".to_string());

View File

@ -470,6 +470,9 @@ impl QueryTerm {
pub fn is_cached_prefix(&self) -> bool { pub fn is_cached_prefix(&self) -> bool {
self.zero_typo.use_prefix_db.is_some() self.zero_typo.use_prefix_db.is_some()
} }
pub fn is_prefix(&self) -> bool {
self.is_prefix
}
pub fn original_word(&self, ctx: &SearchContext) -> String { pub fn original_word(&self, ctx: &SearchContext) -> String {
ctx.word_interner.get(self.original).clone() ctx.word_interner.get(self.original).clone()
} }