Make compute_matches more rust idiomatic

This commit is contained in:
ManyTheFish 2022-04-11 16:46:45 +02:00
parent a16de5de84
commit 011f8210ed

View File

@ -110,27 +110,24 @@ pub struct Matcher<'t, 'm> {
impl<'t> Matcher<'t, '_> { impl<'t> Matcher<'t, '_> {
/// Iterates over tokens and save any of them that matches the query. /// Iterates over tokens and save any of them that matches the query.
fn compute_matches(&mut self) -> &mut Self { fn compute_matches(&mut self) -> &mut Self {
fn compute_partial_match( fn compute_partial_match<'a>(
mut partial: PartialMatch, mut partial: PartialMatch,
tokens: &[Token], token_position: usize,
token_position: &mut usize, word_position: usize,
word_position: &mut usize, words_positions: &mut impl Iterator<Item = (usize, usize, &'a Token<'a>)>,
matches: &mut Vec<Match>, matches: &mut Vec<Match>,
) -> bool { ) -> bool {
let mut potential_matches = vec![(*token_position, *word_position, partial.char_len())]; let mut potential_matches = Vec::new();
let mut t_position = 1;
let mut w_position = 1; // Add first match to potential matches.
for token in &tokens[*token_position + 1..] { potential_matches.push((token_position, word_position, partial.char_len()));
if token.is_separator().is_none() {
partial = match partial.match_token(&token) { for (token_position, word_position, word) in words_positions {
partial = match partial.match_token(&word) {
// token matches the partial match, but the match is not full, // token matches the partial match, but the match is not full,
// we temporarly save the current token then we try to match the next one. // we temporarly save the current token then we try to match the next one.
Some(MatchType::Partial(partial)) => { Some(MatchType::Partial(partial)) => {
potential_matches.push(( potential_matches.push((token_position, word_position, partial.char_len()));
*token_position + t_position,
*word_position + w_position,
partial.char_len(),
));
partial partial
} }
// partial match is now full, we keep this matches and we advance positions // partial match is now full, we keep this matches and we advance positions
@ -146,16 +143,12 @@ impl<'t> Matcher<'t, '_> {
); );
matches.extend(iter); matches.extend(iter);
// move word and token positions after the end of the match.
*word_position += w_position;
*token_position += t_position;
// save the token that closes the partial match as a match. // save the token that closes the partial match as a match.
matches.push(Match { matches.push(Match {
match_len: char_len, match_len: char_len,
ids: ids.to_vec(), ids: ids.to_vec(),
word_position: *word_position, word_position,
token_position: *token_position, token_position,
}); });
// the match is complete, we return true. // the match is complete, we return true.
@ -164,9 +157,6 @@ impl<'t> Matcher<'t, '_> {
// no match, continue to next match. // no match, continue to next match.
None => break, None => break,
}; };
w_position += 1;
}
t_position += 1;
} }
// the match is not complete, we return false. // the match is not complete, we return false.
@ -174,11 +164,24 @@ impl<'t> Matcher<'t, '_> {
} }
let mut matches = Vec::new(); let mut matches = Vec::new();
let mut word_position = 0;
let mut token_position = 0; let mut words_positions = self
while let Some(token) = self.tokens.get(token_position) { .tokens
.iter()
.scan((0, 0), |(token_position, word_position), token| {
let current_token_position = *token_position;
let current_word_position = *word_position;
*token_position += 1;
if token.is_separator().is_none() { if token.is_separator().is_none() {
for match_type in self.matching_words.match_token(&token) { *word_position += 1;
}
Some((current_token_position, current_word_position, token))
})
.filter(|(_, _, token)| token.is_separator().is_none());
while let Some((token_position, word_position, word)) = words_positions.next() {
for match_type in self.matching_words.match_token(word) {
match match_type { match match_type {
// we match, we save the current token as a match, // we match, we save the current token as a match,
// then we continue the rest of the tokens. // then we continue the rest of the tokens.
@ -195,21 +198,20 @@ impl<'t> Matcher<'t, '_> {
MatchType::Partial(partial) => { MatchType::Partial(partial) => {
// if match is completed, we break the matching loop over the current token, // if match is completed, we break the matching loop over the current token,
// then we continue the rest of the tokens. // then we continue the rest of the tokens.
let mut wp = words_positions.clone();
if compute_partial_match( if compute_partial_match(
partial, partial,
&self.tokens, token_position,
&mut token_position, word_position,
&mut word_position, &mut wp,
&mut matches, &mut matches,
) { ) {
words_positions = wp;
break; break;
} }
} }
} }
} }
word_position += 1;
}
token_position += 1;
} }
self.matches = Some(matches); self.matches = Some(matches);
@ -826,4 +828,49 @@ mod tests {
// because crop size is 0, crop is ignored. // because crop size is 0, crop is ignored.
assert_eq!(&matcher.format(highlight, crop), "void void split the world void void."); assert_eq!(&matcher.format(highlight, crop), "void void split the world void void.");
} }
#[test]
fn partial_matches() {
let matching_words = vec![
(vec![MatchingWord::new("the".to_string(), 0, false)], vec![0]),
(
vec![
MatchingWord::new("t".to_string(), 0, false),
MatchingWord::new("he".to_string(), 0, false),
],
vec![0],
),
(vec![MatchingWord::new("door".to_string(), 0, false)], vec![1]),
(
vec![
MatchingWord::new("do".to_string(), 0, false),
MatchingWord::new("or".to_string(), 0, false),
],
vec![1],
),
(vec![MatchingWord::new("do".to_string(), 0, false)], vec![2]),
];
let matching_words = MatchingWords::new(matching_words);
let mut builder = MatcherBuilder::from_matching_words(matching_words);
builder.highlight_prefix("_".to_string());
builder.highlight_suffix("_".to_string());
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
let highlight = true;
let crop = false;
let text = "the do or die can't be he do and or isn't he";
let analyzed = analyzer.analyze(&text);
let tokens: Vec<_> = analyzed.tokens().collect();
let mut matcher = builder.build(&tokens[..], text);
assert_eq!(
&matcher.format(highlight, crop),
"_the_ _do_ _or_ die can't be he _do_ and or isn'_t_ _he_",
"matches: {:?}",
&matcher.matches
);
}
} }