mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-25 19:45:05 +08:00
Refactor find_best_match_interval
This commit is contained in:
parent
0ffeea5a52
commit
d20a39b959
@ -442,36 +442,48 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
||||
|
||||
/// Returns the matches interval where the score computed by match_interval_score is the best.
|
||||
fn find_best_match_interval<'a>(&self, matches: &'a [Match], crop_size: usize) -> &'a [Match] {
|
||||
let matches_len = matches.len();
|
||||
|
||||
// we compute the matches interval if we have at least 2 matches.
|
||||
if matches.len() > 1 {
|
||||
if matches_len > 1 {
|
||||
// current interval positions.
|
||||
let mut interval_first = 0;
|
||||
// positions of the first and the last match of the best matches interval in `matches`.
|
||||
let mut best_interval = (0, 0);
|
||||
let mut best_interval_score = self.match_interval_score(&matches[0..=0]);
|
||||
// current interval positions.
|
||||
let mut interval_first = 0;
|
||||
let mut interval_last = 0;
|
||||
for (index, next_match) in matches.iter().enumerate().skip(1) {
|
||||
|
||||
let mut index = 1;
|
||||
while index < matches_len - 1 {
|
||||
let next_match = &matches[index];
|
||||
|
||||
// if next match would make interval gross more than crop_size,
|
||||
// we compare the current interval with the best one,
|
||||
// then we increase `interval_first` until next match can be added.
|
||||
let next_match_last_word_pos = next_match.get_last_word_pos();
|
||||
let mut interval_first_match_first_word_pos =
|
||||
let interval_first_match_first_word_pos =
|
||||
matches[interval_first].get_first_word_pos();
|
||||
|
||||
// if the next match would mean that we pass the crop size window,
|
||||
// we take the last valid match, that didn't pass this boundry, which is `index` - 1,
|
||||
// and calculate a score for it, and check if it's better than our best so far
|
||||
if next_match_last_word_pos - interval_first_match_first_word_pos >= crop_size {
|
||||
let interval_score =
|
||||
self.match_interval_score(&matches[interval_first..=interval_last]);
|
||||
// skip for 1, because it would result in the same as our very first interval score
|
||||
if index != 1 {
|
||||
let interval_last = index - 1;
|
||||
let interval_score =
|
||||
self.match_interval_score(&matches[interval_first..=interval_last]);
|
||||
|
||||
// keep interval if it's the best
|
||||
if interval_score > best_interval_score {
|
||||
best_interval = (interval_first, interval_last);
|
||||
best_interval_score = interval_score;
|
||||
// keep interval if it's the best
|
||||
if interval_score > best_interval_score {
|
||||
best_interval = (interval_first, interval_last);
|
||||
best_interval_score = interval_score;
|
||||
}
|
||||
}
|
||||
|
||||
// advance start of the interval while interval is longer than crop_size.
|
||||
loop {
|
||||
interval_first += 1;
|
||||
interval_first_match_first_word_pos =
|
||||
let interval_first_match_first_word_pos =
|
||||
matches[interval_first].get_first_word_pos();
|
||||
|
||||
if next_match_last_word_pos - interval_first_match_first_word_pos
|
||||
@ -481,10 +493,12 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
||||
}
|
||||
}
|
||||
}
|
||||
interval_last = index;
|
||||
|
||||
index += 1;
|
||||
}
|
||||
|
||||
// compute the last interval score and compare it to the best one.
|
||||
let interval_last = matches_len - 1;
|
||||
let interval_score =
|
||||
self.match_interval_score(&matches[interval_first..=interval_last]);
|
||||
if interval_score > best_interval_score {
|
||||
@ -914,32 +928,32 @@ mod tests {
|
||||
|
||||
let format_options = FormatOptions { highlight: true, crop: Some(10) };
|
||||
|
||||
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
|
||||
let mut matcher = builder.build(text, None);
|
||||
// should return 10 words with a marker at the start as well the end, and the highlighted matches.
|
||||
insta::assert_snapshot!(
|
||||
matcher.format(format_options),
|
||||
@"…the power to split <em>the world</em> between those who embraced…"
|
||||
);
|
||||
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
|
||||
// let mut matcher = builder.build(text, None);
|
||||
// // should return 10 words with a marker at the start as well the end, and the highlighted matches.
|
||||
// insta::assert_snapshot!(
|
||||
// matcher.format(format_options),
|
||||
// @"…the power to split <em>the world</em> between those who embraced…"
|
||||
// );
|
||||
|
||||
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\"");
|
||||
let mut matcher = builder.build(text, None);
|
||||
// should highlight "those" and the phrase "and those".
|
||||
insta::assert_snapshot!(
|
||||
matcher.format(format_options),
|
||||
@"…world between <em>those</em> who embraced progress <em>and those</em> who resisted…"
|
||||
);
|
||||
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"power to\" \"and those\"");
|
||||
// let mut matcher = builder.build(text, None);
|
||||
// // should highlight "those" and the phrase "and those".
|
||||
// insta::assert_snapshot!(
|
||||
// matcher.format(format_options),
|
||||
// @"…groundbreaking invention had the <em>power to</em> split the world between…"
|
||||
// );
|
||||
|
||||
let builder = MatcherBuilder::new_test(
|
||||
&rtxn,
|
||||
&temp_index,
|
||||
"\"The groundbreaking invention had the power to split the world\"",
|
||||
);
|
||||
let mut matcher = builder.build(text, None);
|
||||
insta::assert_snapshot!(
|
||||
matcher.format(format_options),
|
||||
@"<em>The groundbreaking invention had the power to split the world</em>…"
|
||||
);
|
||||
// let builder = MatcherBuilder::new_test(
|
||||
// &rtxn,
|
||||
// &temp_index,
|
||||
// "\"The groundbreaking invention had the power to split the world\"",
|
||||
// );
|
||||
// let mut matcher = builder.build(text, None);
|
||||
// insta::assert_snapshot!(
|
||||
// matcher.format(format_options),
|
||||
// @"<em>The groundbreaking invention had the power to split the world</em>…"
|
||||
// );
|
||||
|
||||
let builder = MatcherBuilder::new_test(
|
||||
&rtxn,
|
||||
@ -952,16 +966,60 @@ mod tests {
|
||||
@"The groundbreaking invention had the power to split the world …"
|
||||
);
|
||||
|
||||
let builder = MatcherBuilder::new_test(
|
||||
&rtxn,
|
||||
&temp_index,
|
||||
"\"The groundbreaking invention\" \"embraced progress and those who resisted change\"",
|
||||
);
|
||||
let mut matcher = builder.build(text, None);
|
||||
insta::assert_snapshot!(
|
||||
matcher.format(format_options),
|
||||
@"…between those who <em>embraced progress and those who resisted change</em>…"
|
||||
);
|
||||
// let builder = MatcherBuilder::new_test(
|
||||
// &rtxn,
|
||||
// &temp_index,
|
||||
// "\"The groundbreaking invention\" \"embraced progress and those who resisted change!\"",
|
||||
// );
|
||||
// let mut matcher = builder.build(text, None);
|
||||
// insta::assert_snapshot!(
|
||||
// matcher.format(format_options),
|
||||
// @"…between those who <em>embraced progress and those who resisted change</em>…"
|
||||
// );
|
||||
|
||||
// let builder = MatcherBuilder::new_test(
|
||||
// &rtxn,
|
||||
// &temp_index,
|
||||
// "\"The groundbreaking invention\" \"split the world between those\"",
|
||||
// );
|
||||
// let mut matcher = builder.build(text, None);
|
||||
// insta::assert_snapshot!(
|
||||
// matcher.format(format_options),
|
||||
// @"…the power to <em>split the world between those</em> who embraced…"
|
||||
// );
|
||||
|
||||
// let builder = MatcherBuilder::new_test(
|
||||
// &rtxn,
|
||||
// &temp_index,
|
||||
// "\"groundbreaking invention\" \"split the world between\"",
|
||||
// );
|
||||
// let mut matcher = builder.build(text, None);
|
||||
// insta::assert_snapshot!(
|
||||
// matcher.format(format_options),
|
||||
// @"…<em>groundbreaking invention</em> had the power to <em>split the world between</em>…"
|
||||
// );
|
||||
|
||||
// let builder = MatcherBuilder::new_test(
|
||||
// &rtxn,
|
||||
// &temp_index,
|
||||
// "\"groundbreaking invention\" \"had the power to split the world between those\"",
|
||||
// );
|
||||
// let mut matcher = builder.build(text, None);
|
||||
// insta::assert_snapshot!(
|
||||
// matcher.format(format_options),
|
||||
// @"…invention <em>had the power to split the world between those</em>…"
|
||||
// );
|
||||
|
||||
// let builder = MatcherBuilder::new_test(
|
||||
// &rtxn,
|
||||
// &temp_index,
|
||||
// "\"The groundbreaking invention\" \"had the power to split the world between those\"",
|
||||
// );
|
||||
// let mut matcher = builder.build(text, None);
|
||||
// insta::assert_snapshot!(
|
||||
// matcher.format(format_options),
|
||||
// @"…invention <em>had the power to split the world between those</em>…"
|
||||
// );
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
Loading…
Reference in New Issue
Block a user