From 11df155598412240abdc851d37ad9bcd9b82b691 Mon Sep 17 00:00:00 2001 From: Vivek Kumar Date: Mon, 4 Sep 2023 12:35:42 +0530 Subject: [PATCH 1/3] fix highlighting bug when searching for a phrase with cropping --- milli/src/search/new/matches/mod.rs | 33 +++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs index 72e155b3e..be4258e6a 100644 --- a/milli/src/search/new/matches/mod.rs +++ b/milli/src/search/new/matches/mod.rs @@ -418,19 +418,11 @@ impl<'t> Matcher<'t, '_> { } else { match &self.matches { Some((tokens, matches)) => { - // If the text has to be cropped, - // compute the best interval to crop around. - let matches = match format_options.crop { - Some(crop_size) if crop_size > 0 => { - self.find_best_match_interval(matches, crop_size) - } - _ => matches, - }; - // If the text has to be cropped, // crop around the best interval. let (byte_start, byte_end) = match format_options.crop { Some(crop_size) if crop_size > 0 => { + let matches = self.find_best_match_interval(matches, crop_size); self.crop_bounds(tokens, matches, crop_size) } _ => (0, self.text.len()), @@ -450,6 +442,11 @@ impl<'t> Matcher<'t, '_> { for m in matches { let token = &tokens[m.token_position]; + // skip matches out of the crop window. + if token.byte_start < byte_start || token.byte_end > byte_end { + continue; + } + if byte_index < token.byte_start { formatted.push(&self.text[byte_index..token.byte_start]); } @@ -800,6 +797,24 @@ mod tests { ); } + #[test] + fn format_highlight_crop_phrase_only() { + //! testing: https://github.com/meilisearch/meilisearch/issues/3975 + let temp_index = temp_index_with_documents(); + let rtxn = temp_index.read_txn().unwrap(); + let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\""); + + let format_options = FormatOptions { highlight: true, crop: Some(10) }; + + let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!"; + let mut matcher = builder.build(text); + // should return 10 words with a marker at the start as well the end, and the highlighted matches. + insta::assert_snapshot!( + matcher.format(format_options), + @"…had the power to split the world between those who…" + ); + } + #[test] fn smaller_crop_size() { //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295 From f2837aaec26afe9354ae0187d8992d51cf62a1bb Mon Sep 17 00:00:00 2001 From: Vivek Kumar Date: Tue, 5 Sep 2023 22:22:27 +0530 Subject: [PATCH 2/3] add another test case --- milli/src/search/new/matches/mod.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs index be4258e6a..ac6e49008 100644 --- a/milli/src/search/new/matches/mod.rs +++ b/milli/src/search/new/matches/mod.rs @@ -798,21 +798,29 @@ mod tests { } #[test] - fn format_highlight_crop_phrase_only() { + fn format_highlight_crop_phrase_query() { //! testing: https://github.com/meilisearch/meilisearch/issues/3975 let temp_index = temp_index_with_documents(); let rtxn = temp_index.read_txn().unwrap(); - let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\""); let format_options = FormatOptions { highlight: true, crop: Some(10) }; - let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!"; + + let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\""); let mut matcher = builder.build(text); // should return 10 words with a marker at the start as well the end, and the highlighted matches. insta::assert_snapshot!( matcher.format(format_options), @"…had the power to split the world between those who…" ); + + let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\""); + let mut matcher = builder.build(text); + // should highlight both "and" and "those". + insta::assert_snapshot!( + matcher.format(format_options), + @"…between those who embraced progress and those who resisted change…" + ); } #[test] From abfa7ded25ba3a94a772c552e9b11de034de4eaa Mon Sep 17 00:00:00 2001 From: Vivek Kumar Date: Wed, 6 Sep 2023 17:31:18 +0530 Subject: [PATCH 3/3] use a new temp index in the test --- milli/src/search/new/matches/mod.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs index ac6e49008..5d61de0f4 100644 --- a/milli/src/search/new/matches/mod.rs +++ b/milli/src/search/new/matches/mod.rs @@ -800,7 +800,12 @@ mod tests { #[test] fn format_highlight_crop_phrase_query() { //! testing: https://github.com/meilisearch/meilisearch/issues/3975 - let temp_index = temp_index_with_documents(); + let temp_index = TempIndex::new(); + temp_index + .add_documents(documents!([ + { "id": 1, "text": "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!" } + ])) + .unwrap(); let rtxn = temp_index.read_txn().unwrap(); let format_options = FormatOptions { highlight: true, crop: Some(10) }; @@ -816,10 +821,10 @@ mod tests { let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\""); let mut matcher = builder.build(text); - // should highlight both "and" and "those". + // should highlight "those" and the phrase "and those". insta::assert_snapshot!( matcher.format(format_options), - @"…between those who embraced progress and those who resisted change…" + @"…world between those who embraced progress and those who resisted…" ); }