Improve changes to Matcher

This commit is contained in:
F. Levi 2024-09-13 13:31:07 +03:00
parent e7af499314
commit cc6a2aec06

View File

@ -93,6 +93,16 @@ impl FormatOptions {
} }
} }
enum FL {
First,
Last,
}
enum WT {
Word,
Token,
}
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum MatchPosition { pub enum MatchPosition {
Word { Word {
@ -256,28 +266,22 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
} }
} }
// @TODO: This should be improved, looks nasty fn get_match_pos(&self, m: &Match, wt: WT, fl: FL) -> usize {
fn get_match_pos(&self, m: &Match, is_first: bool, is_word: bool) -> usize {
match m.position { match m.position {
MatchPosition::Word { word_position, token_position } => { MatchPosition::Word { word_position, token_position } => match wt {
if is_word { WT::Word => word_position,
word_position WT::Token => token_position,
} else { },
token_position MatchPosition::Phrase { word_positions: (fwp, lwp), token_positions: (ftp, ltp) } => {
} match wt {
} WT::Word => match fl {
MatchPosition::Phrase { word_positions: (wpf, wpl), token_positions: (tpf, tpl) } => { FL::First => fwp,
if is_word { FL::Last => lwp,
if is_first { },
return wpf; WT::Token => match fl {
} else { FL::First => ftp,
return wpl; FL::Last => ltp,
} },
}
if is_first {
tpf
} else {
tpl
} }
} }
} }
@ -292,13 +296,13 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
) -> (usize, usize) { ) -> (usize, usize) {
// if there is no match, we start from the beginning of the string by default. // if there is no match, we start from the beginning of the string by default.
let first_match_word_position = let first_match_word_position =
matches.first().map(|m| self.get_match_pos(m, true, true)).unwrap_or(0); matches.first().map(|m| self.get_match_pos(m, WT::Word, FL::First)).unwrap_or(0);
let first_match_token_position = let first_match_token_position =
matches.first().map(|m| self.get_match_pos(m, true, false)).unwrap_or(0); matches.first().map(|m| self.get_match_pos(m, WT::Token, FL::First)).unwrap_or(0);
let last_match_word_position = let last_match_word_position =
matches.last().map(|m| self.get_match_pos(m, false, true)).unwrap_or(0); matches.last().map(|m| self.get_match_pos(m, WT::Word, FL::Last)).unwrap_or(0);
let last_match_token_position = let last_match_token_position =
matches.last().map(|m| self.get_match_pos(m, false, false)).unwrap_or(0); matches.last().map(|m| self.get_match_pos(m, WT::Token, FL::Last)).unwrap_or(0);
// matches needs to be counted in the crop len. // matches needs to be counted in the crop len.
let mut remaining_words = crop_size + first_match_word_position - last_match_word_position; let mut remaining_words = crop_size + first_match_word_position - last_match_word_position;
@ -401,10 +405,12 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
order_score += 1; order_score += 1;
} }
let next_match_first_word_pos = self.get_match_pos(next_match, WT::Word, FL::First);
let current_match_first_word_pos = self.get_match_pos(m, WT::Word, FL::First);
// compute distance between matches // compute distance between matches
distance_score -= (self.get_match_pos(next_match, true, true) distance_score -=
- self.get_match_pos(m, true, true)) (next_match_first_word_pos - current_match_first_word_pos).min(7) as i16;
.min(7) as i16;
} }
ids.extend(m.ids.iter()); ids.extend(m.ids.iter());
@ -432,12 +438,11 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
// if next match would make interval gross more than crop_size, // if next match would make interval gross more than crop_size,
// we compare the current interval with the best one, // we compare the current interval with the best one,
// then we increase `interval_first` until next match can be added. // then we increase `interval_first` until next match can be added.
let next_match_word_position = self.get_match_pos(next_match, true, true); let next_match_word_pos = self.get_match_pos(next_match, WT::Word, FL::First);
let mut interval_first_match_word_pos =
self.get_match_pos(&matches[interval_first], WT::Word, FL::Last);
if next_match_word_position if next_match_word_pos - interval_first_match_word_pos >= crop_size {
- self.get_match_pos(&matches[interval_first], false, true)
>= crop_size
{
let interval_score = let interval_score =
self.match_interval_score(&matches[interval_first..=interval_last]); self.match_interval_score(&matches[interval_first..=interval_last]);
@ -450,11 +455,10 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
// advance start of the interval while interval is longer than crop_size. // advance start of the interval while interval is longer than crop_size.
loop { loop {
interval_first += 1; interval_first += 1;
interval_first_match_word_pos =
self.get_match_pos(&matches[interval_first], WT::Word, FL::Last);
if next_match_word_position if next_match_word_pos - interval_first_match_word_pos < crop_size {
- self.get_match_pos(&matches[interval_first], false, true)
< crop_size
{
break; break;
} }
} }