mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-19 01:18:31 +08:00
Hotfix panic for unicode characters
When the highlight bound is in the middle of a character or if we are out of bounds, we highlight the complete matching word. note: we should enhance the tokenizer and the Highlighter to match char indices. Fix #1368
This commit is contained in:
parent
c11c909bad
commit
acfe31151e
@ -580,13 +580,23 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> {
|
|||||||
// Matcher::match since the call is expensive.
|
// Matcher::match since the call is expensive.
|
||||||
if format_options.highlight && token.is_word() {
|
if format_options.highlight && token.is_word() {
|
||||||
if let Some(length) = matcher.matches(token.text()) {
|
if let Some(length) = matcher.matches(token.text()) {
|
||||||
if format_options.highlight {
|
match word.get(..length).zip(word.get(length..)) {
|
||||||
out.push_str(&self.marks.0);
|
Some((head, tail)) => {
|
||||||
out.push_str(&word[..length]);
|
out.push_str(&self.marks.0);
|
||||||
out.push_str(&self.marks.1);
|
out.push_str(head);
|
||||||
out.push_str(&word[length..]);
|
out.push_str(&self.marks.1);
|
||||||
return out;
|
out.push_str(tail);
|
||||||
|
}
|
||||||
|
// if we are in the middle of a character
|
||||||
|
// or if all the word should be highlighted,
|
||||||
|
// we highlight the complete word.
|
||||||
|
None => {
|
||||||
|
out.push_str(&self.marks.0);
|
||||||
|
out.push_str(&word);
|
||||||
|
out.push_str(&self.marks.1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out.push_str(word);
|
out.push_str(word);
|
||||||
@ -741,6 +751,71 @@ mod test {
|
|||||||
assert_eq!(value["author"], "J. R. R. Tolkien");
|
assert_eq!(value["author"], "J. R. R. Tolkien");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// https://github.com/meilisearch/MeiliSearch/issues/1368
|
||||||
|
#[test]
|
||||||
|
fn formatted_with_highlight_in_unicode_word() {
|
||||||
|
let stop_words = fst::Set::default();
|
||||||
|
let mut config = AnalyzerConfig::default();
|
||||||
|
config.stop_words(&stop_words);
|
||||||
|
let analyzer = Analyzer::new(config);
|
||||||
|
let formatter = Formatter::new(&analyzer, (String::from("<em>"), String::from("</em>")));
|
||||||
|
|
||||||
|
let mut fields = FieldsIdsMap::new();
|
||||||
|
let title = fields.insert("title").unwrap();
|
||||||
|
let author = fields.insert("author").unwrap();
|
||||||
|
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
let mut obkv = obkv::KvWriter::new(&mut buf);
|
||||||
|
obkv.insert(
|
||||||
|
title,
|
||||||
|
Value::String("Go💼od luck.".into()).to_string().as_bytes(),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
obkv.finish().unwrap();
|
||||||
|
obkv = obkv::KvWriter::new(&mut buf);
|
||||||
|
obkv.insert(
|
||||||
|
author,
|
||||||
|
Value::String("JacobLey".into()).to_string().as_bytes(),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
obkv.finish().unwrap();
|
||||||
|
|
||||||
|
let obkv = obkv::KvReader::new(&buf);
|
||||||
|
|
||||||
|
let mut formatted_options = BTreeMap::new();
|
||||||
|
formatted_options.insert(
|
||||||
|
title,
|
||||||
|
FormatOptions {
|
||||||
|
highlight: true,
|
||||||
|
crop: None,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
formatted_options.insert(
|
||||||
|
author,
|
||||||
|
FormatOptions {
|
||||||
|
highlight: false,
|
||||||
|
crop: None,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut matching_words = BTreeMap::new();
|
||||||
|
// emojis are deunicoded during tokenization
|
||||||
|
// TODO Tokenizer should remove spaces after deunicode
|
||||||
|
matching_words.insert("gobriefcase od", Some(11));
|
||||||
|
|
||||||
|
let value = format_fields(
|
||||||
|
&fields,
|
||||||
|
obkv,
|
||||||
|
&formatter,
|
||||||
|
&matching_words,
|
||||||
|
&formatted_options,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(value["title"], "<em>Go💼od</em> luck.");
|
||||||
|
assert_eq!(value["author"], "JacobLey");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn formatted_with_crop_2() {
|
fn formatted_with_crop_2() {
|
||||||
let stop_words = fst::Set::default();
|
let stop_words = fst::Set::default();
|
||||||
|
Loading…
Reference in New Issue
Block a user