diff --git a/http-ui/Cargo.toml b/http-ui/Cargo.toml
index 593dba3e5..79c784fdd 100644
--- a/http-ui/Cargo.toml
+++ b/http-ui/Cargo.toml
@@ -17,6 +17,7 @@ once_cell = "1.5.2"
rayon = "1.5.0"
structopt = { version = "0.3.21", default-features = false, features = ["wrap_help"] }
tempfile = "3.2.0"
+unicode-segmentation = "1.6.0"
# http server
askama = "0.10.5"
diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs
index 75a9012c6..386f10cb4 100644
--- a/http-ui/src/main.rs
+++ b/http-ui/src/main.rs
@@ -34,6 +34,7 @@ use structopt::StructOpt;
use tokio::fs::File as TFile;
use tokio::io::AsyncWriteExt;
use tokio::sync::broadcast;
+use unicode_segmentation::UnicodeSegmentation;
use warp::filters::ws::Message;
use warp::http::Response;
use warp::Filter;
@@ -160,13 +161,21 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
let analyzed = self.analyzer.analyze(&old_string);
for (word, token) in analyzed.reconstruct() {
if token.is_word() {
- let to_highlight = matching_words.matching_bytes(token.text()).is_some();
- if to_highlight {
- string.push_str("")
- }
- string.push_str(word);
- if to_highlight {
- string.push_str("")
+ let chars_to_highlight = matching_words.matching_bytes(&token).unwrap_or(0);
+ if chars_to_highlight > 0 {
+ let graphemes = word.graphemes(true);
+ let chars = graphemes.clone().into_iter();
+
+ string.push_str("");
+ string.push_str(
+ chars.take(chars_to_highlight).collect::().as_str(),
+ );
+ string.push_str("");
+
+ let chars = graphemes.into_iter().skip(chars_to_highlight);
+ string.push_str(chars.collect::().as_str());
+ } else {
+ string.push_str(word);
}
} else {
string.push_str(word);
diff --git a/milli/src/search/matching_words.rs b/milli/src/search/matching_words.rs
index 37754a782..b22335658 100644
--- a/milli/src/search/matching_words.rs
+++ b/milli/src/search/matching_words.rs
@@ -3,6 +3,7 @@ use std::collections::{BTreeMap, HashSet};
use std::ops::{Index, IndexMut};
use levenshtein_automata::{Distance, DFA};
+use meilisearch_tokenizer::Token;
use super::build_dfa;
use crate::search::query_tree::{Operation, Query};
@@ -33,15 +34,18 @@ impl MatchingWords {
}
/// Returns the number of matching bytes if the word matches one of the query words.
- pub fn matching_bytes(&self, word_to_highlight: &str) -> Option {
+ pub fn matching_bytes(&self, word_to_highlight: &Token) -> Option {
self.dfas.iter().find_map(|(dfa, query_word, typo, is_prefix)| {
- match dfa.eval(word_to_highlight) {
+ match dfa.eval(word_to_highlight.text()) {
Distance::Exact(t) if t <= *typo => {
if *is_prefix {
- let len = bytes_to_highlight(word_to_highlight, query_word);
- Some(len)
+ let len = bytes_to_highlight(word_to_highlight.text(), query_word);
+ Some(word_to_highlight.num_graphemes_from_bytes(len))
} else {
- Some(word_to_highlight.len())
+ Some(
+ word_to_highlight
+ .num_graphemes_from_bytes(word_to_highlight.text().len()),
+ )
}
}
_otherwise => None,