mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Fix prefix highlight with special chars
This commit is contained in:
parent
b3f0f39106
commit
a93cd8c61c
@ -363,11 +363,15 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
formatted.push(&self.text[byte_index..token.byte_start]);
|
formatted.push(&self.text[byte_index..token.byte_start]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let highlight_byte_index = self.text[token.byte_start..]
|
||||||
|
.char_indices()
|
||||||
|
.enumerate()
|
||||||
|
.find(|(i, _)| *i == m.match_len)
|
||||||
|
.map_or(token.byte_end, |(_, (i, _))| i + token.byte_start);
|
||||||
formatted.push(self.highlight_prefix);
|
formatted.push(self.highlight_prefix);
|
||||||
formatted.push(&self.text[token.byte_start..][..m.match_len]);
|
formatted.push(&self.text[token.byte_start..highlight_byte_index]);
|
||||||
formatted.push(self.highlight_suffix);
|
formatted.push(self.highlight_suffix);
|
||||||
formatted
|
formatted.push(&self.text[highlight_byte_index..token.byte_end]);
|
||||||
.push(&self.text[token.byte_start + m.match_len..token.byte_end]);
|
|
||||||
|
|
||||||
byte_index = token.byte_end;
|
byte_index = token.byte_end;
|
||||||
}
|
}
|
||||||
@ -398,6 +402,8 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::search::query_tree::{Query, QueryKind};
|
use crate::search::query_tree::{Query, QueryKind};
|
||||||
|
|
||||||
@ -506,17 +512,53 @@ mod tests {
|
|||||||
&matcher.format(highlight, crop),
|
&matcher.format(highlight, crop),
|
||||||
"Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves."
|
"Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves."
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// Text containing some matches by prefix.
|
#[test]
|
||||||
let text = "Natalie risk her future to build a worldle with the boy she loves.";
|
fn highlight_unicode() {
|
||||||
|
let query_tree = Operation::Or(
|
||||||
|
false,
|
||||||
|
vec![Operation::And(vec![
|
||||||
|
Operation::Query(Query {
|
||||||
|
prefix: true,
|
||||||
|
kind: QueryKind::tolerant(1, "wessfalia".to_string()),
|
||||||
|
}),
|
||||||
|
Operation::Query(Query {
|
||||||
|
prefix: true,
|
||||||
|
kind: QueryKind::tolerant(1, "world".to_string()),
|
||||||
|
}),
|
||||||
|
])],
|
||||||
|
);
|
||||||
|
|
||||||
|
let builder = MatcherBuilder::from_query_tree(&query_tree);
|
||||||
|
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
||||||
|
|
||||||
|
let highlight = true;
|
||||||
|
let crop = false;
|
||||||
|
|
||||||
|
// Text containing prefix match.
|
||||||
|
let text = "Ŵôřlḑôle";
|
||||||
let analyzed = analyzer.analyze(&text);
|
let analyzed = analyzer.analyze(&text);
|
||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// no crop should return complete text with highlighted matches.
|
// no crop should return complete text with highlighted matches.
|
||||||
assert_eq!(
|
assert_eq!(&matcher.format(highlight, crop), "<em>Ŵôřlḑ</em>ôle");
|
||||||
&matcher.format(highlight, crop),
|
|
||||||
"Natalie risk her future to build a <em>world</em>le with <em>the</em> boy she loves."
|
// Text containing unicode match.
|
||||||
);
|
let text = "Ŵôřlḑ";
|
||||||
|
let analyzed = analyzer.analyze(&text);
|
||||||
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
|
// no crop should return complete text with highlighted matches.
|
||||||
|
assert_eq!(&matcher.format(highlight, crop), "<em>Ŵôřlḑ</em>");
|
||||||
|
|
||||||
|
// Text containing unicode match.
|
||||||
|
let text = "Westfália";
|
||||||
|
let analyzed = analyzer.analyze(&text);
|
||||||
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
|
// no crop should return complete text with highlighted matches.
|
||||||
|
assert_eq!(&matcher.format(highlight, crop), "<em>Westfália</em>");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
Loading…
Reference in New Issue
Block a user