Add format option structure

This commit is contained in:
ManyTheFish 2022-04-12 13:42:14 +02:00
parent 011f8210ed
commit 827cedcd15
4 changed files with 85 additions and 91 deletions

View File

@ -25,8 +25,8 @@ use milli::update::{
ClearDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting, ClearDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting,
}; };
use milli::{ use milli::{
obkv_to_json, CompressionType, Filter as MilliFilter, FilterCondition, Index, MatcherBuilder, obkv_to_json, CompressionType, Filter as MilliFilter, FilterCondition, FormatOptions, Index,
SearchResult, SortError, MatcherBuilder, SearchResult, SortError,
}; };
use once_cell::sync::OnceCell; use once_cell::sync::OnceCell;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@ -162,7 +162,9 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
let analyzed: Vec<_> = analyzed.tokens().collect(); let analyzed: Vec<_> = analyzed.tokens().collect();
let mut matcher = matcher_builder.build(&analyzed[..], &old_string); let mut matcher = matcher_builder.build(&analyzed[..], &old_string);
Value::String(matcher.format(true, true).to_string()) let format_options = FormatOptions { highlight: true, crop: Some(10) };
Value::String(matcher.format(format_options).to_string())
} }
Value::Array(values) => Value::Array( Value::Array(values) => Value::Array(
values.into_iter().map(|v| self.highlight_value(v, matcher_builder)).collect(), values.into_iter().map(|v| self.highlight_value(v, matcher_builder)).collect(),

View File

@ -37,7 +37,8 @@ pub use self::heed_codec::{
}; };
pub use self::index::Index; pub use self::index::Index;
pub use self::search::{ pub use self::search::{
FacetDistribution, Filter, MatchBounds, MatcherBuilder, MatchingWords, Search, SearchResult, FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, Search,
SearchResult,
}; };
pub type Result<T> = std::result::Result<T, error::Error>; pub type Result<T> = std::result::Result<T, error::Error>;

View File

@ -8,14 +8,12 @@ use crate::search::matches::matching_words::PartialMatch;
pub mod matching_words; pub mod matching_words;
const DEFAULT_CROP_SIZE: usize = 10;
const DEFAULT_CROP_MARKER: &'static str = ""; const DEFAULT_CROP_MARKER: &'static str = "";
const DEFAULT_HIGHLIGHT_PREFIX: &'static str = "<em>"; const DEFAULT_HIGHLIGHT_PREFIX: &'static str = "<em>";
const DEFAULT_HIGHLIGHT_SUFFIX: &'static str = "</em>"; const DEFAULT_HIGHLIGHT_SUFFIX: &'static str = "</em>";
pub struct MatcherBuilder { pub struct MatcherBuilder {
matching_words: MatchingWords, matching_words: MatchingWords,
crop_size: usize,
crop_marker: Option<String>, crop_marker: Option<String>,
highlight_prefix: Option<String>, highlight_prefix: Option<String>,
highlight_suffix: Option<String>, highlight_suffix: Option<String>,
@ -23,18 +21,7 @@ pub struct MatcherBuilder {
impl MatcherBuilder { impl MatcherBuilder {
pub fn from_matching_words(matching_words: MatchingWords) -> Self { pub fn from_matching_words(matching_words: MatchingWords) -> Self {
Self { Self { matching_words, crop_marker: None, highlight_prefix: None, highlight_suffix: None }
matching_words,
crop_size: DEFAULT_CROP_SIZE,
crop_marker: None,
highlight_prefix: None,
highlight_suffix: None,
}
}
pub fn crop_size(&mut self, word_count: usize) -> &Self {
self.crop_size = word_count;
self
} }
pub fn crop_marker(&mut self, marker: String) -> &Self { pub fn crop_marker(&mut self, marker: String) -> &Self {
@ -70,7 +57,6 @@ impl MatcherBuilder {
text, text,
tokens, tokens,
matching_words: &self.matching_words, matching_words: &self.matching_words,
crop_size: self.crop_size,
crop_marker, crop_marker,
highlight_prefix, highlight_prefix,
highlight_suffix, highlight_suffix,
@ -79,6 +65,18 @@ impl MatcherBuilder {
} }
} }
#[derive(Copy, Clone, Default)]
pub struct FormatOptions {
pub highlight: bool,
pub crop: Option<usize>,
}
impl FormatOptions {
pub fn merge(self, other: Self) -> Self {
Self { highlight: self.highlight || other.highlight, crop: self.crop.or(other.crop) }
}
}
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Match { pub struct Match {
match_len: usize, match_len: usize,
@ -100,7 +98,6 @@ pub struct Matcher<'t, 'm> {
text: &'t str, text: &'t str,
tokens: &'t [Token<'t>], tokens: &'t [Token<'t>],
matching_words: &'m MatchingWords, matching_words: &'m MatchingWords,
crop_size: usize,
crop_marker: &'m str, crop_marker: &'m str,
highlight_prefix: &'m str, highlight_prefix: &'m str,
highlight_suffix: &'m str, highlight_suffix: &'m str,
@ -233,7 +230,7 @@ impl<'t> Matcher<'t, '_> {
} }
/// Returns the bounds in byte index of the crop window. /// Returns the bounds in byte index of the crop window.
fn crop_bounds(&self, matches: &[Match]) -> (usize, usize) { fn crop_bounds(&self, matches: &[Match], crop_size: usize) -> (usize, usize) {
// if there is no match, we start from the beginning of the string by default. // if there is no match, we start from the beginning of the string by default.
let first_match_word_position = matches.first().map(|m| m.word_position).unwrap_or(0); let first_match_word_position = matches.first().map(|m| m.word_position).unwrap_or(0);
let first_match_token_position = matches.first().map(|m| m.token_position).unwrap_or(0); let first_match_token_position = matches.first().map(|m| m.token_position).unwrap_or(0);
@ -241,8 +238,7 @@ impl<'t> Matcher<'t, '_> {
let last_match_token_position = matches.last().map(|m| m.token_position).unwrap_or(0); let last_match_token_position = matches.last().map(|m| m.token_position).unwrap_or(0);
// matches needs to be counted in the crop len. // matches needs to be counted in the crop len.
let mut remaining_words = let mut remaining_words = crop_size + first_match_word_position - last_match_word_position;
self.crop_size + first_match_word_position - last_match_word_position;
let mut before_tokens = self.tokens[..first_match_token_position].iter().rev().peekable(); let mut before_tokens = self.tokens[..first_match_token_position].iter().rev().peekable();
let mut after_tokens = self.tokens[last_match_token_position..].iter().peekable(); let mut after_tokens = self.tokens[last_match_token_position..].iter().peekable();
@ -348,7 +344,7 @@ impl<'t> Matcher<'t, '_> {
} }
/// Returns the matches interval where the score computed by match_interval_score is maximal. /// Returns the matches interval where the score computed by match_interval_score is maximal.
fn find_best_match_interval<'a>(&self, matches: &'a [Match]) -> &'a [Match] { fn find_best_match_interval<'a>(&self, matches: &'a [Match], crop_size: usize) -> &'a [Match] {
// we compute the matches interval if we have at least 2 matches. // we compute the matches interval if we have at least 2 matches.
if matches.len() > 1 { if matches.len() > 1 {
// positions of the first and the last match of the best matches interval in `matches`. // positions of the first and the last match of the best matches interval in `matches`.
@ -361,9 +357,7 @@ impl<'t> Matcher<'t, '_> {
// if next match would make interval gross more than crop_size, // if next match would make interval gross more than crop_size,
// we compare the current interval with the best one, // we compare the current interval with the best one,
// then we increase `interval_first` until next match can be added. // then we increase `interval_first` until next match can be added.
if next_match.word_position - matches[interval_first].word_position if next_match.word_position - matches[interval_first].word_position >= crop_size {
>= self.crop_size
{
let interval_score = let interval_score =
self.match_interval_score(&matches[interval_first..=interval_last]); self.match_interval_score(&matches[interval_first..=interval_last]);
@ -375,7 +369,7 @@ impl<'t> Matcher<'t, '_> {
// advance start of the interval while interval is longer than crop_size. // advance start of the interval while interval is longer than crop_size.
while next_match.word_position - matches[interval_first].word_position while next_match.word_position - matches[interval_first].word_position
>= self.crop_size >= crop_size
{ {
interval_first += 1; interval_first += 1;
} }
@ -397,21 +391,24 @@ impl<'t> Matcher<'t, '_> {
} }
// Returns the formatted version of the original text. // Returns the formatted version of the original text.
pub fn format(&mut self, highlight: bool, crop: bool) -> Cow<'t, str> { pub fn format(&mut self, format_options: FormatOptions) -> Cow<'t, str> {
// If 0 it will be considered null and thus not crop the field if !format_options.highlight && format_options.crop.is_none() {
// https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
let crop = crop && self.crop_size > 0;
if !highlight && !crop {
// compute matches is not needed if no highlight nor crop is requested. // compute matches is not needed if no highlight nor crop is requested.
Cow::Borrowed(self.text) Cow::Borrowed(self.text)
} else { } else {
match &self.matches { match &self.matches {
Some(matches) => { Some(matches) => {
let matches = let matches = match format_options.crop {
if crop { self.find_best_match_interval(matches) } else { matches }; Some(crop_size) if crop_size > 0 => {
self.find_best_match_interval(matches, crop_size)
}
_ => matches,
};
let (byte_start, byte_end) = let (byte_start, byte_end) = match format_options.crop {
if crop { self.crop_bounds(matches) } else { (0, self.text.len()) }; Some(crop_size) if crop_size > 0 => self.crop_bounds(matches, crop_size),
_ => (0, self.text.len()),
};
let mut formatted = Vec::new(); let mut formatted = Vec::new();
@ -422,7 +419,7 @@ impl<'t> Matcher<'t, '_> {
let mut byte_index = byte_start; let mut byte_index = byte_start;
if highlight { if format_options.highlight {
// insert highlight markers around matches. // insert highlight markers around matches.
let tokens = self.tokens; let tokens = self.tokens;
for m in matches { for m in matches {
@ -466,7 +463,7 @@ impl<'t> Matcher<'t, '_> {
Cow::Owned(formatted.concat()) Cow::Owned(formatted.concat())
} }
} }
None => self.compute_matches().format(highlight, crop), None => self.compute_matches().format(format_options),
} }
} }
} }
@ -496,8 +493,7 @@ mod tests {
let builder = MatcherBuilder::from_matching_words(matching_words); let builder = MatcherBuilder::from_matching_words(matching_words);
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
let highlight = false; let format_options = FormatOptions { highlight: false, crop: None };
let crop = false;
// Text without any match. // Text without any match.
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
@ -505,7 +501,7 @@ mod tests {
let tokens: Vec<_> = analyzed.tokens().collect(); let tokens: Vec<_> = analyzed.tokens().collect();
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// no crop and no highlight should return complete text. // no crop and no highlight should return complete text.
assert_eq!(&matcher.format(highlight, crop), &text); assert_eq!(&matcher.format(format_options.clone()), &text);
// Text containing all matches. // Text containing all matches.
let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
@ -513,7 +509,7 @@ mod tests {
let tokens: Vec<_> = analyzed.tokens().collect(); let tokens: Vec<_> = analyzed.tokens().collect();
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// no crop and no highlight should return complete text. // no crop and no highlight should return complete text.
assert_eq!(&matcher.format(highlight, crop), &text); assert_eq!(&matcher.format(format_options.clone()), &text);
// Text containing some matches. // Text containing some matches.
let text = "Natalie risk her future to build a world with the boy she loves."; let text = "Natalie risk her future to build a world with the boy she loves.";
@ -521,7 +517,7 @@ mod tests {
let tokens: Vec<_> = analyzed.tokens().collect(); let tokens: Vec<_> = analyzed.tokens().collect();
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// no crop and no highlight should return complete text. // no crop and no highlight should return complete text.
assert_eq!(&matcher.format(highlight, crop), &text); assert_eq!(&matcher.format(format_options.clone()), &text);
} }
#[test] #[test]
@ -531,22 +527,21 @@ mod tests {
let builder = MatcherBuilder::from_matching_words(matching_words); let builder = MatcherBuilder::from_matching_words(matching_words);
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
let highlight = true; let format_options = FormatOptions { highlight: true, crop: None };
let crop = false;
// empty text. // empty text.
let text = ""; let text = "";
let analyzed = analyzer.analyze(&text); let analyzed = analyzer.analyze(&text);
let tokens: Vec<_> = analyzed.tokens().collect(); let tokens: Vec<_> = analyzed.tokens().collect();
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
assert_eq!(&matcher.format(highlight, crop), ""); assert_eq!(&matcher.format(format_options.clone()), "");
// text containing only separators. // text containing only separators.
let text = ":-)"; let text = ":-)";
let analyzed = analyzer.analyze(&text); let analyzed = analyzer.analyze(&text);
let tokens: Vec<_> = analyzed.tokens().collect(); let tokens: Vec<_> = analyzed.tokens().collect();
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
assert_eq!(&matcher.format(highlight, crop), ":-)"); assert_eq!(&matcher.format(format_options.clone()), ":-)");
// Text without any match. // Text without any match.
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
@ -554,7 +549,7 @@ mod tests {
let tokens: Vec<_> = analyzed.tokens().collect(); let tokens: Vec<_> = analyzed.tokens().collect();
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// no crop should return complete text, because there is no matches. // no crop should return complete text, because there is no matches.
assert_eq!(&matcher.format(highlight, crop), &text); assert_eq!(&matcher.format(format_options.clone()), &text);
// Text containing all matches. // Text containing all matches.
let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
@ -562,7 +557,7 @@ mod tests {
let tokens: Vec<_> = analyzed.tokens().collect(); let tokens: Vec<_> = analyzed.tokens().collect();
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// no crop should return complete text with highlighted matches. // no crop should return complete text with highlighted matches.
assert_eq!(&matcher.format(highlight, crop), "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>."); assert_eq!(&matcher.format(format_options.clone()), "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>.");
// Text containing some matches. // Text containing some matches.
let text = "Natalie risk her future to build a world with the boy she loves."; let text = "Natalie risk her future to build a world with the boy she loves.";
@ -571,7 +566,7 @@ mod tests {
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// no crop should return complete text with highlighted matches. // no crop should return complete text with highlighted matches.
assert_eq!( assert_eq!(
&matcher.format(highlight, crop), &matcher.format(format_options.clone()),
"Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves." "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves."
); );
} }
@ -588,8 +583,7 @@ mod tests {
let builder = MatcherBuilder::from_matching_words(matching_words); let builder = MatcherBuilder::from_matching_words(matching_words);
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
let highlight = true; let format_options = FormatOptions { highlight: true, crop: None };
let crop = false;
// Text containing prefix match. // Text containing prefix match.
let text = "Ŵôřlḑôle"; let text = "Ŵôřlḑôle";
@ -597,7 +591,7 @@ mod tests {
let tokens: Vec<_> = analyzed.tokens().collect(); let tokens: Vec<_> = analyzed.tokens().collect();
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// no crop should return complete text with highlighted matches. // no crop should return complete text with highlighted matches.
assert_eq!(&matcher.format(highlight, crop), "<em>Ŵôřlḑ</em>ôle"); assert_eq!(&matcher.format(format_options.clone()), "<em>Ŵôřlḑ</em>ôle");
// Text containing unicode match. // Text containing unicode match.
let text = "Ŵôřlḑ"; let text = "Ŵôřlḑ";
@ -605,7 +599,7 @@ mod tests {
let tokens: Vec<_> = analyzed.tokens().collect(); let tokens: Vec<_> = analyzed.tokens().collect();
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// no crop should return complete text with highlighted matches. // no crop should return complete text with highlighted matches.
assert_eq!(&matcher.format(highlight, crop), "<em>Ŵôřlḑ</em>"); assert_eq!(&matcher.format(format_options.clone()), "<em>Ŵôřlḑ</em>");
// Text containing unicode match. // Text containing unicode match.
let text = "Westfália"; let text = "Westfália";
@ -613,7 +607,7 @@ mod tests {
let tokens: Vec<_> = analyzed.tokens().collect(); let tokens: Vec<_> = analyzed.tokens().collect();
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// no crop should return complete text with highlighted matches. // no crop should return complete text with highlighted matches.
assert_eq!(&matcher.format(highlight, crop), "<em>Westfáli</em>a"); assert_eq!(&matcher.format(format_options.clone()), "<em>Westfáli</em>a");
} }
#[test] #[test]
@ -623,22 +617,21 @@ mod tests {
let builder = MatcherBuilder::from_matching_words(matching_words); let builder = MatcherBuilder::from_matching_words(matching_words);
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
let highlight = false; let format_options = FormatOptions { highlight: false, crop: Some(10) };
let crop = true;
// empty text. // empty text.
let text = ""; let text = "";
let analyzed = analyzer.analyze(&text); let analyzed = analyzer.analyze(&text);
let tokens: Vec<_> = analyzed.tokens().collect(); let tokens: Vec<_> = analyzed.tokens().collect();
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
assert_eq!(&matcher.format(highlight, crop), ""); assert_eq!(&matcher.format(format_options.clone()), "");
// text containing only separators. // text containing only separators.
let text = ":-)"; let text = ":-)";
let analyzed = analyzer.analyze(&text); let analyzed = analyzer.analyze(&text);
let tokens: Vec<_> = analyzed.tokens().collect(); let tokens: Vec<_> = analyzed.tokens().collect();
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
assert_eq!(&matcher.format(highlight, crop), ":-)"); assert_eq!(&matcher.format(format_options.clone()), ":-)");
// Text without any match. // Text without any match.
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
@ -647,7 +640,7 @@ mod tests {
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// no highlight should return 10 first words with a marker at the end. // no highlight should return 10 first words with a marker at the end.
assert_eq!( assert_eq!(
&matcher.format(highlight, crop), &matcher.format(format_options.clone()),
"A quick brown fox can not jump 32 feet, right…" "A quick brown fox can not jump 32 feet, right…"
); );
@ -658,7 +651,7 @@ mod tests {
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// no highlight should return 10 first words with a marker at the end. // no highlight should return 10 first words with a marker at the end.
assert_eq!( assert_eq!(
&matcher.format(highlight, crop), &matcher.format(format_options.clone()),
"(A quick brown fox can not jump 32 feet, right…" "(A quick brown fox can not jump 32 feet, right…"
); );
@ -669,7 +662,7 @@ mod tests {
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// should crop the phrase instead of croping around the match. // should crop the phrase instead of croping around the match.
assert_eq!( assert_eq!(
&matcher.format(highlight, crop), &matcher.format(format_options.clone()),
"…Split The World is a book written by Emily Henry…" "…Split The World is a book written by Emily Henry…"
); );
@ -680,7 +673,7 @@ mod tests {
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// no highlight should return 10 last words with a marker at the start. // no highlight should return 10 last words with a marker at the start.
assert_eq!( assert_eq!(
&matcher.format(highlight, crop), &matcher.format(format_options.clone()),
"…future to build a world with the boy she loves…" "…future to build a world with the boy she loves…"
); );
@ -691,7 +684,7 @@ mod tests {
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// no highlight should return 10 last words with a marker at the start. // no highlight should return 10 last words with a marker at the start.
assert_eq!( assert_eq!(
&matcher.format(highlight, crop), &matcher.format(format_options.clone()),
"…she loves. Emily Henry: The Love That Split The World." "…she loves. Emily Henry: The Love That Split The World."
); );
@ -702,7 +695,7 @@ mod tests {
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// crop should return 10 last words with a marker at the start. // crop should return 10 last words with a marker at the start.
assert_eq!( assert_eq!(
&matcher.format(highlight, crop), &matcher.format(format_options.clone()),
"…void void void void void split the world void void" "…void void void void void split the world void void"
); );
@ -713,7 +706,7 @@ mod tests {
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// crop should return 10 last words with a marker at the start. // crop should return 10 last words with a marker at the start.
assert_eq!( assert_eq!(
&matcher.format(highlight, crop), &matcher.format(format_options.clone()),
"…void void void void void split the world void void" "…void void void void void split the world void void"
); );
@ -724,7 +717,7 @@ mod tests {
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// crop should return 10 last words with a marker at the start. // crop should return 10 last words with a marker at the start.
assert_eq!( assert_eq!(
&matcher.format(highlight, crop), &matcher.format(format_options.clone()),
"…void void void void void split the world void void" "…void void void void void split the world void void"
); );
} }
@ -736,22 +729,21 @@ mod tests {
let builder = MatcherBuilder::from_matching_words(matching_words); let builder = MatcherBuilder::from_matching_words(matching_words);
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
let highlight = true; let format_options = FormatOptions { highlight: true, crop: Some(10) };
let crop = true;
// empty text. // empty text.
let text = ""; let text = "";
let analyzed = analyzer.analyze(&text); let analyzed = analyzer.analyze(&text);
let tokens: Vec<_> = analyzed.tokens().collect(); let tokens: Vec<_> = analyzed.tokens().collect();
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
assert_eq!(&matcher.format(highlight, crop), ""); assert_eq!(&matcher.format(format_options.clone()), "");
// text containing only separators. // text containing only separators.
let text = ":-)"; let text = ":-)";
let analyzed = analyzer.analyze(&text); let analyzed = analyzer.analyze(&text);
let tokens: Vec<_> = analyzed.tokens().collect(); let tokens: Vec<_> = analyzed.tokens().collect();
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
assert_eq!(&matcher.format(highlight, crop), ":-)"); assert_eq!(&matcher.format(format_options.clone()), ":-)");
// Text without any match. // Text without any match.
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
@ -760,7 +752,7 @@ mod tests {
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// both should return 10 first words with a marker at the end. // both should return 10 first words with a marker at the end.
assert_eq!( assert_eq!(
&matcher.format(highlight, crop), &matcher.format(format_options.clone()),
"A quick brown fox can not jump 32 feet, right…" "A quick brown fox can not jump 32 feet, right…"
); );
@ -771,7 +763,7 @@ mod tests {
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// both should return 10 last words with a marker at the start and highlighted matches. // both should return 10 last words with a marker at the start and highlighted matches.
assert_eq!( assert_eq!(
&matcher.format(highlight, crop), &matcher.format(format_options.clone()),
"…future to build a <em>world</em> with <em>the</em> boy she loves…" "…future to build a <em>world</em> with <em>the</em> boy she loves…"
); );
@ -781,7 +773,7 @@ mod tests {
let tokens: Vec<_> = analyzed.tokens().collect(); let tokens: Vec<_> = analyzed.tokens().collect();
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// both should return 10 last words with a marker at the start and highlighted matches. // both should return 10 last words with a marker at the start and highlighted matches.
assert_eq!(&matcher.format(highlight, crop), "…she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>."); assert_eq!(&matcher.format(format_options.clone()), "…she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>.");
// Text containing a match unordered and a match ordered. // Text containing a match unordered and a match ordered.
let text = "The world split void void void void void void void void void split the world void void"; let text = "The world split void void void void void void void void void split the world void void";
@ -790,7 +782,7 @@ mod tests {
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// crop should return 10 last words with a marker at the start. // crop should return 10 last words with a marker at the start.
assert_eq!( assert_eq!(
&matcher.format(highlight, crop), &matcher.format(format_options.clone()),
"…void void void void void <em>split</em> <em>the</em> <em>world</em> void void" "…void void void void void <em>split</em> <em>the</em> <em>world</em> void void"
); );
} }
@ -800,33 +792,33 @@ mod tests {
//! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295 //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
let matching_words = matching_words(); let matching_words = matching_words();
let mut builder = MatcherBuilder::from_matching_words(matching_words); let builder = MatcherBuilder::from_matching_words(matching_words);
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
let highlight = false;
let crop = true;
let text = "void void split the world void void."; let text = "void void split the world void void.";
let analyzed = analyzer.analyze(&text); let analyzed = analyzer.analyze(&text);
let tokens: Vec<_> = analyzed.tokens().collect(); let tokens: Vec<_> = analyzed.tokens().collect();
// set a smaller crop size // set a smaller crop size
builder.crop_size(2); let format_options = FormatOptions { highlight: false, crop: Some(2) };
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// because crop size < query size, partially format matches. // because crop size < query size, partially format matches.
assert_eq!(&matcher.format(highlight, crop), "…split the…"); assert_eq!(&matcher.format(format_options), "…split the…");
// set a smaller crop size // set a smaller crop size
builder.crop_size(1); let format_options = FormatOptions { highlight: false, crop: Some(1) };
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// because crop size < query size, partially format matches. // because crop size < query size, partially format matches.
assert_eq!(&matcher.format(highlight, crop), "…split…"); assert_eq!(&matcher.format(format_options), "…split…");
// set crop size to 0
let format_options = FormatOptions { highlight: false, crop: Some(0) };
// set a smaller crop size
builder.crop_size(0);
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
// because crop size is 0, crop is ignored. // because crop size is 0, crop is ignored.
assert_eq!(&matcher.format(highlight, crop), "void void split the world void void."); assert_eq!(&matcher.format(format_options), "void void split the world void void.");
} }
#[test] #[test]
@ -858,8 +850,7 @@ mod tests {
builder.highlight_suffix("_".to_string()); builder.highlight_suffix("_".to_string());
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
let highlight = true; let format_options = FormatOptions { highlight: true, crop: None };
let crop = false;
let text = "the do or die can't be he do and or isn't he"; let text = "the do or die can't be he do and or isn't he";
let analyzed = analyzer.analyze(&text); let analyzed = analyzer.analyze(&text);
@ -867,7 +858,7 @@ mod tests {
let mut matcher = builder.build(&tokens[..], text); let mut matcher = builder.build(&tokens[..], text);
assert_eq!( assert_eq!(
&matcher.format(highlight, crop), &matcher.format(format_options),
"_the_ _do_ _or_ die can't be he _do_ and or isn'_t_ _he_", "_the_ _do_ _or_ die can't be he _do_ and or isn'_t_ _he_",
"matches: {:?}", "matches: {:?}",
&matcher.matches &matcher.matches

View File

@ -17,7 +17,7 @@ use roaring::bitmap::RoaringBitmap;
pub use self::facet::{FacetDistribution, FacetNumberIter, Filter}; pub use self::facet::{FacetDistribution, FacetNumberIter, Filter};
use self::fst_utils::{Complement, Intersection, StartsWith, Union}; use self::fst_utils::{Complement, Intersection, StartsWith, Union};
pub use self::matches::{MatchBounds, Matcher, MatcherBuilder, MatchingWords}; pub use self::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
use self::query_tree::QueryTreeBuilder; use self::query_tree::QueryTreeBuilder;
use crate::error::UserError; use crate::error::UserError;
use crate::search::criteria::r#final::{Final, FinalResult}; use crate::search::criteria::r#final::{Final, FinalResult};