mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 20:15:07 +08:00
Make compute_matches more rust idiomatic
This commit is contained in:
parent
a16de5de84
commit
011f8210ed
@ -110,63 +110,53 @@ pub struct Matcher<'t, 'm> {
|
|||||||
impl<'t> Matcher<'t, '_> {
|
impl<'t> Matcher<'t, '_> {
|
||||||
/// Iterates over tokens and save any of them that matches the query.
|
/// Iterates over tokens and save any of them that matches the query.
|
||||||
fn compute_matches(&mut self) -> &mut Self {
|
fn compute_matches(&mut self) -> &mut Self {
|
||||||
fn compute_partial_match(
|
fn compute_partial_match<'a>(
|
||||||
mut partial: PartialMatch,
|
mut partial: PartialMatch,
|
||||||
tokens: &[Token],
|
token_position: usize,
|
||||||
token_position: &mut usize,
|
word_position: usize,
|
||||||
word_position: &mut usize,
|
words_positions: &mut impl Iterator<Item = (usize, usize, &'a Token<'a>)>,
|
||||||
matches: &mut Vec<Match>,
|
matches: &mut Vec<Match>,
|
||||||
) -> bool {
|
) -> bool {
|
||||||
let mut potential_matches = vec![(*token_position, *word_position, partial.char_len())];
|
let mut potential_matches = Vec::new();
|
||||||
let mut t_position = 1;
|
|
||||||
let mut w_position = 1;
|
|
||||||
for token in &tokens[*token_position + 1..] {
|
|
||||||
if token.is_separator().is_none() {
|
|
||||||
partial = match partial.match_token(&token) {
|
|
||||||
// token matches the partial match, but the match is not full,
|
|
||||||
// we temporarly save the current token then we try to match the next one.
|
|
||||||
Some(MatchType::Partial(partial)) => {
|
|
||||||
potential_matches.push((
|
|
||||||
*token_position + t_position,
|
|
||||||
*word_position + w_position,
|
|
||||||
partial.char_len(),
|
|
||||||
));
|
|
||||||
partial
|
|
||||||
}
|
|
||||||
// partial match is now full, we keep this matches and we advance positions
|
|
||||||
Some(MatchType::Full { char_len, ids }) => {
|
|
||||||
// save previously matched tokens as matches.
|
|
||||||
let iter = potential_matches.into_iter().map(
|
|
||||||
|(token_position, word_position, match_len)| Match {
|
|
||||||
match_len,
|
|
||||||
ids: ids.to_vec(),
|
|
||||||
word_position,
|
|
||||||
token_position,
|
|
||||||
},
|
|
||||||
);
|
|
||||||
matches.extend(iter);
|
|
||||||
|
|
||||||
// move word and token positions after the end of the match.
|
// Add first match to potential matches.
|
||||||
*word_position += w_position;
|
potential_matches.push((token_position, word_position, partial.char_len()));
|
||||||
*token_position += t_position;
|
|
||||||
|
|
||||||
// save the token that closes the partial match as a match.
|
for (token_position, word_position, word) in words_positions {
|
||||||
matches.push(Match {
|
partial = match partial.match_token(&word) {
|
||||||
match_len: char_len,
|
// token matches the partial match, but the match is not full,
|
||||||
|
// we temporarly save the current token then we try to match the next one.
|
||||||
|
Some(MatchType::Partial(partial)) => {
|
||||||
|
potential_matches.push((token_position, word_position, partial.char_len()));
|
||||||
|
partial
|
||||||
|
}
|
||||||
|
// partial match is now full, we keep this matches and we advance positions
|
||||||
|
Some(MatchType::Full { char_len, ids }) => {
|
||||||
|
// save previously matched tokens as matches.
|
||||||
|
let iter = potential_matches.into_iter().map(
|
||||||
|
|(token_position, word_position, match_len)| Match {
|
||||||
|
match_len,
|
||||||
ids: ids.to_vec(),
|
ids: ids.to_vec(),
|
||||||
word_position: *word_position,
|
word_position,
|
||||||
token_position: *token_position,
|
token_position,
|
||||||
});
|
},
|
||||||
|
);
|
||||||
|
matches.extend(iter);
|
||||||
|
|
||||||
// the match is complete, we return true.
|
// save the token that closes the partial match as a match.
|
||||||
return true;
|
matches.push(Match {
|
||||||
}
|
match_len: char_len,
|
||||||
// no match, continue to next match.
|
ids: ids.to_vec(),
|
||||||
None => break,
|
word_position,
|
||||||
};
|
token_position,
|
||||||
w_position += 1;
|
});
|
||||||
}
|
|
||||||
t_position += 1;
|
// the match is complete, we return true.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// no match, continue to next match.
|
||||||
|
None => break,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// the match is not complete, we return false.
|
// the match is not complete, we return false.
|
||||||
@ -174,42 +164,54 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut matches = Vec::new();
|
let mut matches = Vec::new();
|
||||||
let mut word_position = 0;
|
|
||||||
let mut token_position = 0;
|
let mut words_positions = self
|
||||||
while let Some(token) = self.tokens.get(token_position) {
|
.tokens
|
||||||
if token.is_separator().is_none() {
|
.iter()
|
||||||
for match_type in self.matching_words.match_token(&token) {
|
.scan((0, 0), |(token_position, word_position), token| {
|
||||||
match match_type {
|
let current_token_position = *token_position;
|
||||||
// we match, we save the current token as a match,
|
let current_word_position = *word_position;
|
||||||
|
*token_position += 1;
|
||||||
|
if token.is_separator().is_none() {
|
||||||
|
*word_position += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some((current_token_position, current_word_position, token))
|
||||||
|
})
|
||||||
|
.filter(|(_, _, token)| token.is_separator().is_none());
|
||||||
|
|
||||||
|
while let Some((token_position, word_position, word)) = words_positions.next() {
|
||||||
|
for match_type in self.matching_words.match_token(word) {
|
||||||
|
match match_type {
|
||||||
|
// we match, we save the current token as a match,
|
||||||
|
// then we continue the rest of the tokens.
|
||||||
|
MatchType::Full { char_len, ids } => {
|
||||||
|
matches.push(Match {
|
||||||
|
match_len: char_len,
|
||||||
|
ids: ids.to_vec(),
|
||||||
|
word_position,
|
||||||
|
token_position,
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// we match partially, iterate over next tokens to check if we can complete the match.
|
||||||
|
MatchType::Partial(partial) => {
|
||||||
|
// if match is completed, we break the matching loop over the current token,
|
||||||
// then we continue the rest of the tokens.
|
// then we continue the rest of the tokens.
|
||||||
MatchType::Full { char_len, ids } => {
|
let mut wp = words_positions.clone();
|
||||||
matches.push(Match {
|
if compute_partial_match(
|
||||||
match_len: char_len,
|
partial,
|
||||||
ids: ids.to_vec(),
|
token_position,
|
||||||
word_position,
|
word_position,
|
||||||
token_position,
|
&mut wp,
|
||||||
});
|
&mut matches,
|
||||||
|
) {
|
||||||
|
words_positions = wp;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// we match partially, iterate over next tokens to check if we can complete the match.
|
|
||||||
MatchType::Partial(partial) => {
|
|
||||||
// if match is completed, we break the matching loop over the current token,
|
|
||||||
// then we continue the rest of the tokens.
|
|
||||||
if compute_partial_match(
|
|
||||||
partial,
|
|
||||||
&self.tokens,
|
|
||||||
&mut token_position,
|
|
||||||
&mut word_position,
|
|
||||||
&mut matches,
|
|
||||||
) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
word_position += 1;
|
|
||||||
}
|
}
|
||||||
token_position += 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
self.matches = Some(matches);
|
self.matches = Some(matches);
|
||||||
@ -826,4 +828,49 @@ mod tests {
|
|||||||
// because crop size is 0, crop is ignored.
|
// because crop size is 0, crop is ignored.
|
||||||
assert_eq!(&matcher.format(highlight, crop), "void void split the world void void.");
|
assert_eq!(&matcher.format(highlight, crop), "void void split the world void void.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn partial_matches() {
|
||||||
|
let matching_words = vec![
|
||||||
|
(vec![MatchingWord::new("the".to_string(), 0, false)], vec![0]),
|
||||||
|
(
|
||||||
|
vec![
|
||||||
|
MatchingWord::new("t".to_string(), 0, false),
|
||||||
|
MatchingWord::new("he".to_string(), 0, false),
|
||||||
|
],
|
||||||
|
vec![0],
|
||||||
|
),
|
||||||
|
(vec![MatchingWord::new("door".to_string(), 0, false)], vec![1]),
|
||||||
|
(
|
||||||
|
vec![
|
||||||
|
MatchingWord::new("do".to_string(), 0, false),
|
||||||
|
MatchingWord::new("or".to_string(), 0, false),
|
||||||
|
],
|
||||||
|
vec![1],
|
||||||
|
),
|
||||||
|
(vec![MatchingWord::new("do".to_string(), 0, false)], vec![2]),
|
||||||
|
];
|
||||||
|
|
||||||
|
let matching_words = MatchingWords::new(matching_words);
|
||||||
|
|
||||||
|
let mut builder = MatcherBuilder::from_matching_words(matching_words);
|
||||||
|
builder.highlight_prefix("_".to_string());
|
||||||
|
builder.highlight_suffix("_".to_string());
|
||||||
|
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
||||||
|
|
||||||
|
let highlight = true;
|
||||||
|
let crop = false;
|
||||||
|
|
||||||
|
let text = "the do or die can't be he do and or isn't he";
|
||||||
|
let analyzed = analyzer.analyze(&text);
|
||||||
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
|
|
||||||
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
|
assert_eq!(
|
||||||
|
&matcher.format(highlight, crop),
|
||||||
|
"_the_ _do_ _or_ die can't be he _do_ and or isn'_t_ _he_",
|
||||||
|
"matches: {:?}",
|
||||||
|
&matcher.matches
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user