diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs
index 9266992d0..680dbdffc 100644
--- a/milli/src/search/matches/mod.rs
+++ b/milli/src/search/matches/mod.rs
@@ -158,9 +158,13 @@ impl<'t> Matcher<'t, '_> {
let last_match_word_position = matches.last().map(|m| m.word_position).unwrap_or(0);
let last_match_token_position = matches.last().map(|m| m.token_position).unwrap_or(0);
- // TODO: buggy if no match and fisrt token is a sepparator
+ // TODO: buggy if no match and first token is a sepparator
let mut remaining_words =
- self.crop_size + first_match_word_position - last_match_word_position - 1;
+ self.crop_size + first_match_word_position - last_match_word_position;
+ // if first token is a word, then remove 1 to remaining_words.
+ if let Some(None) = self.tokens.get(first_match_token_position).map(|t| t.is_separator()) {
+ remaining_words -= 1;
+ }
let mut first_token_position = first_match_token_position;
let mut last_token_position = last_match_token_position;
@@ -204,18 +208,21 @@ impl<'t> Matcher<'t, '_> {
}
}
}
+ // the end of the text is reached, advance left.
(Some(ft), None) => {
first_token_position -= 1;
if ft.is_separator().is_none() {
remaining_words -= 1;
}
}
+ // the start of the text is reached, advance right.
(None, Some(lt)) => {
last_token_position += 1;
if lt.is_separator().is_none() {
remaining_words -= 1;
}
}
+ // no more token to add.
(None, None) => break,
}
}
@@ -263,13 +270,14 @@ impl<'t> Matcher<'t, '_> {
fn find_best_match_interval<'a>(&self, matches: &'a [Match]) -> &'a [Match] {
if matches.len() > 1 {
- let mut best_interval = (0, 1);
- let mut best_interval_score = self.match_interval_score(&matches[0..=1]);
+ let mut best_interval = (0, 0);
+ let mut best_interval_score = self.match_interval_score(&matches[0..=0]);
let mut interval_first = 0;
- let mut interval_last = 1;
- for (index, next_match) in matches.iter().enumerate().skip(2) {
+ let mut interval_last = 0;
+ for (index, next_match) in matches.iter().enumerate().skip(1) {
// if next match would make interval gross more than crop_size
- if next_match.word_position - matches[interval_first].word_position > self.crop_size
+ if next_match.word_position - matches[interval_first].word_position
+ >= self.crop_size
{
let interval_score =
self.match_interval_score(&matches[interval_first..=interval_last]);
@@ -282,7 +290,7 @@ impl<'t> Matcher<'t, '_> {
// advance start of the interval while interval is longer than crop_size
while next_match.word_position - matches[interval_first].word_position
- > self.crop_size
+ >= self.crop_size
{
interval_first += 1;
}
@@ -307,10 +315,15 @@ impl<'t> Matcher<'t, '_> {
let (first_token_position, last_token_position) = self.token_crop_bounds(match_interval);
- (self.tokens[first_token_position].byte_start, self.tokens[last_token_position].byte_end)
+ let byte_start = self.tokens.get(first_token_position).map_or(0, |t| t.byte_start);
+ let byte_end = self.tokens.get(last_token_position).map_or(byte_start, |t| t.byte_end);
+ (byte_start, byte_end)
}
pub fn format(&mut self, highlight: bool, crop: bool) -> Cow<'t, str> {
+ // If 0 it will be considered null and thus not crop the field
+ // https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
+ let crop = crop && self.crop_size > 0;
if !highlight && !crop {
// compute matches is not needed if no highlight or crop is requested.
Cow::Borrowed(self.text)
@@ -444,6 +457,20 @@ mod tests {
let highlight = true;
let crop = false;
+ // empty text.
+ let text = "";
+ let analyzed = analyzer.analyze(&text);
+ let tokens: Vec<_> = analyzed.tokens().collect();
+ let mut matcher = builder.build(&tokens[..], text);
+ assert_eq!(&matcher.format(highlight, crop), "");
+
+ // text containing only separators.
+ let text = ":-)";
+ let analyzed = analyzer.analyze(&text);
+ let tokens: Vec<_> = analyzed.tokens().collect();
+ let mut matcher = builder.build(&tokens[..], text);
+ assert_eq!(&matcher.format(highlight, crop), ":-)");
+
// Text without any match.
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
let analyzed = analyzer.analyze(&text);
@@ -482,6 +509,20 @@ mod tests {
let highlight = false;
let crop = true;
+ // empty text.
+ let text = "";
+ let analyzed = analyzer.analyze(&text);
+ let tokens: Vec<_> = analyzed.tokens().collect();
+ let mut matcher = builder.build(&tokens[..], text);
+ assert_eq!(&matcher.format(highlight, crop), "");
+
+ // text containing only separators.
+ let text = ":-)";
+ let analyzed = analyzer.analyze(&text);
+ let tokens: Vec<_> = analyzed.tokens().collect();
+ let mut matcher = builder.build(&tokens[..], text);
+ assert_eq!(&matcher.format(highlight, crop), ":-)");
+
// Text without any match.
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
let analyzed = analyzer.analyze(&text);
@@ -493,6 +534,17 @@ mod tests {
"A quick brown fox can not jump 32 feet, right? …"
);
+ // Text without any match starting by a separator.
+ let text = "(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)";
+ let analyzed = analyzer.analyze(&text);
+ let tokens: Vec<_> = analyzed.tokens().collect();
+ let mut matcher = builder.build(&tokens[..], text);
+ // no highlight should return 10 first words with a marker at the end.
+ assert_eq!(
+ &matcher.format(highlight, crop),
+ "(A quick brown fox can not jump 32 feet, right? …"
+ );
+
// Test phrase propagation
let text = "Natalie risk her future. Split The World is a book written by Emily Henry. I never read it.";
let analyzed = analyzer.analyze(&text);
@@ -570,6 +622,20 @@ mod tests {
let highlight = true;
let crop = true;
+ // empty text.
+ let text = "";
+ let analyzed = analyzer.analyze(&text);
+ let tokens: Vec<_> = analyzed.tokens().collect();
+ let mut matcher = builder.build(&tokens[..], text);
+ assert_eq!(&matcher.format(highlight, crop), "");
+
+ // text containing only separators.
+ let text = ":-)";
+ let analyzed = analyzer.analyze(&text);
+ let tokens: Vec<_> = analyzed.tokens().collect();
+ let mut matcher = builder.build(&tokens[..], text);
+ assert_eq!(&matcher.format(highlight, crop), ":-)");
+
// Text without any match.
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
let analyzed = analyzer.analyze(&text);
@@ -611,4 +677,38 @@ mod tests {
"…void void void void void split the world void void"
);
}
+
+ #[test]
+ fn smaller_crop_size() {
+ //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
+ let query_tree = query_tree();
+
+ let mut builder = MatcherBuilder::from_query_tree(&query_tree);
+ let analyzer = Analyzer::new(AnalyzerConfig::>::default());
+
+ let highlight = false;
+ let crop = true;
+
+ let text = "void void split the world void void.";
+ let analyzed = analyzer.analyze(&text);
+ let tokens: Vec<_> = analyzed.tokens().collect();
+
+ // set a smaller crop size
+ builder.crop_size(2);
+ let mut matcher = builder.build(&tokens[..], text);
+ // because crop size < query size, partially format matches.
+ assert_eq!(&matcher.format(highlight, crop), "…split the …");
+
+ // set a smaller crop size
+ builder.crop_size(1);
+ let mut matcher = builder.build(&tokens[..], text);
+ // because crop size < query size, partially format matches.
+ assert_eq!(&matcher.format(highlight, crop), "…split …");
+
+ // set a smaller crop size
+ builder.crop_size(0);
+ let mut matcher = builder.build(&tokens[..], text);
+ // because crop size is 0, crop is ignored.
+ assert_eq!(&matcher.format(highlight, crop), "void void split the world void void.");
+ }
}