diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs
index e66ba781c..a4c29ce66 100644
--- a/milli/src/search/matches/mod.rs
+++ b/milli/src/search/matches/mod.rs
@@ -363,11 +363,15 @@ impl<'t> Matcher<'t, '_> {
formatted.push(&self.text[byte_index..token.byte_start]);
}
+ let highlight_byte_index = self.text[token.byte_start..]
+ .char_indices()
+ .enumerate()
+ .find(|(i, _)| *i == m.match_len)
+ .map_or(token.byte_end, |(_, (i, _))| i + token.byte_start);
formatted.push(self.highlight_prefix);
- formatted.push(&self.text[token.byte_start..][..m.match_len]);
+ formatted.push(&self.text[token.byte_start..highlight_byte_index]);
formatted.push(self.highlight_suffix);
- formatted
- .push(&self.text[token.byte_start + m.match_len..token.byte_end]);
+ formatted.push(&self.text[highlight_byte_index..token.byte_end]);
byte_index = token.byte_end;
}
@@ -398,6 +402,8 @@ impl<'t> Matcher<'t, '_> {
#[cfg(test)]
mod tests {
+ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
+
use super::*;
use crate::search::query_tree::{Query, QueryKind};
@@ -506,17 +512,53 @@ mod tests {
&matcher.format(highlight, crop),
"Natalie risk her future to build a world with the boy she loves."
);
+ }
- // Text containing some matches by prefix.
- let text = "Natalie risk her future to build a worldle with the boy she loves.";
+ #[test]
+ fn highlight_unicode() {
+ let query_tree = Operation::Or(
+ false,
+ vec![Operation::And(vec![
+ Operation::Query(Query {
+ prefix: true,
+ kind: QueryKind::tolerant(1, "wessfalia".to_string()),
+ }),
+ Operation::Query(Query {
+ prefix: true,
+ kind: QueryKind::tolerant(1, "world".to_string()),
+ }),
+ ])],
+ );
+
+ let builder = MatcherBuilder::from_query_tree(&query_tree);
+ let analyzer = Analyzer::new(AnalyzerConfig::>::default());
+
+ let highlight = true;
+ let crop = false;
+
+ // Text containing prefix match.
+ let text = "Ŵôřlḑôle";
let analyzed = analyzer.analyze(&text);
let tokens: Vec<_> = analyzed.tokens().collect();
let mut matcher = builder.build(&tokens[..], text);
// no crop should return complete text with highlighted matches.
- assert_eq!(
- &matcher.format(highlight, crop),
- "Natalie risk her future to build a worldle with the boy she loves."
- );
+ assert_eq!(&matcher.format(highlight, crop), "Ŵôřlḑôle");
+
+ // Text containing unicode match.
+ let text = "Ŵôřlḑ";
+ let analyzed = analyzer.analyze(&text);
+ let tokens: Vec<_> = analyzed.tokens().collect();
+ let mut matcher = builder.build(&tokens[..], text);
+ // no crop should return complete text with highlighted matches.
+ assert_eq!(&matcher.format(highlight, crop), "Ŵôřlḑ");
+
+ // Text containing unicode match.
+ let text = "Westfália";
+ let analyzed = analyzer.analyze(&text);
+ let tokens: Vec<_> = analyzed.tokens().collect();
+ let mut matcher = builder.build(&tokens[..], text);
+ // no crop should return complete text with highlighted matches.
+ assert_eq!(&matcher.format(highlight, crop), "Westfália");
}
#[test]