fix highlighting bug when searching for a phrase with cropping

This commit is contained in:
Vivek Kumar 2023-09-04 12:35:42 +05:30
parent b9ad59c969
commit 11df155598
No known key found for this signature in database
GPG Key ID: 5F915FFA1FD3B841

View File

@ -418,19 +418,11 @@ impl<'t> Matcher<'t, '_> {
} else { } else {
match &self.matches { match &self.matches {
Some((tokens, matches)) => { Some((tokens, matches)) => {
// If the text has to be cropped,
// compute the best interval to crop around.
let matches = match format_options.crop {
Some(crop_size) if crop_size > 0 => {
self.find_best_match_interval(matches, crop_size)
}
_ => matches,
};
// If the text has to be cropped, // If the text has to be cropped,
// crop around the best interval. // crop around the best interval.
let (byte_start, byte_end) = match format_options.crop { let (byte_start, byte_end) = match format_options.crop {
Some(crop_size) if crop_size > 0 => { Some(crop_size) if crop_size > 0 => {
let matches = self.find_best_match_interval(matches, crop_size);
self.crop_bounds(tokens, matches, crop_size) self.crop_bounds(tokens, matches, crop_size)
} }
_ => (0, self.text.len()), _ => (0, self.text.len()),
@ -450,6 +442,11 @@ impl<'t> Matcher<'t, '_> {
for m in matches { for m in matches {
let token = &tokens[m.token_position]; let token = &tokens[m.token_position];
// skip matches out of the crop window.
if token.byte_start < byte_start || token.byte_end > byte_end {
continue;
}
if byte_index < token.byte_start { if byte_index < token.byte_start {
formatted.push(&self.text[byte_index..token.byte_start]); formatted.push(&self.text[byte_index..token.byte_start]);
} }
@ -800,6 +797,24 @@ mod tests {
); );
} }
#[test]
fn format_highlight_crop_phrase_only() {
//! testing: https://github.com/meilisearch/meilisearch/issues/3975
let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap();
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
let format_options = FormatOptions { highlight: true, crop: Some(10) };
let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
let mut matcher = builder.build(text);
// should return 10 words with a marker at the start as well the end, and the highlighted matches.
insta::assert_snapshot!(
matcher.format(format_options),
@"…had the power to split <em>the</em> <em>world</em> between those who…"
);
}
#[test] #[test]
fn smaller_crop_size() { fn smaller_crop_size() {
//! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295 //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295