mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Add format option structure
This commit is contained in:
parent
011f8210ed
commit
827cedcd15
@ -25,8 +25,8 @@ use milli::update::{
|
|||||||
ClearDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting,
|
ClearDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting,
|
||||||
};
|
};
|
||||||
use milli::{
|
use milli::{
|
||||||
obkv_to_json, CompressionType, Filter as MilliFilter, FilterCondition, Index, MatcherBuilder,
|
obkv_to_json, CompressionType, Filter as MilliFilter, FilterCondition, FormatOptions, Index,
|
||||||
SearchResult, SortError,
|
MatcherBuilder, SearchResult, SortError,
|
||||||
};
|
};
|
||||||
use once_cell::sync::OnceCell;
|
use once_cell::sync::OnceCell;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
@ -162,7 +162,9 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
|
|||||||
let analyzed: Vec<_> = analyzed.tokens().collect();
|
let analyzed: Vec<_> = analyzed.tokens().collect();
|
||||||
let mut matcher = matcher_builder.build(&analyzed[..], &old_string);
|
let mut matcher = matcher_builder.build(&analyzed[..], &old_string);
|
||||||
|
|
||||||
Value::String(matcher.format(true, true).to_string())
|
let format_options = FormatOptions { highlight: true, crop: Some(10) };
|
||||||
|
|
||||||
|
Value::String(matcher.format(format_options).to_string())
|
||||||
}
|
}
|
||||||
Value::Array(values) => Value::Array(
|
Value::Array(values) => Value::Array(
|
||||||
values.into_iter().map(|v| self.highlight_value(v, matcher_builder)).collect(),
|
values.into_iter().map(|v| self.highlight_value(v, matcher_builder)).collect(),
|
||||||
|
@ -37,7 +37,8 @@ pub use self::heed_codec::{
|
|||||||
};
|
};
|
||||||
pub use self::index::Index;
|
pub use self::index::Index;
|
||||||
pub use self::search::{
|
pub use self::search::{
|
||||||
FacetDistribution, Filter, MatchBounds, MatcherBuilder, MatchingWords, Search, SearchResult,
|
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, Search,
|
||||||
|
SearchResult,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, error::Error>;
|
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||||
|
@ -8,14 +8,12 @@ use crate::search::matches::matching_words::PartialMatch;
|
|||||||
|
|
||||||
pub mod matching_words;
|
pub mod matching_words;
|
||||||
|
|
||||||
const DEFAULT_CROP_SIZE: usize = 10;
|
|
||||||
const DEFAULT_CROP_MARKER: &'static str = "…";
|
const DEFAULT_CROP_MARKER: &'static str = "…";
|
||||||
const DEFAULT_HIGHLIGHT_PREFIX: &'static str = "<em>";
|
const DEFAULT_HIGHLIGHT_PREFIX: &'static str = "<em>";
|
||||||
const DEFAULT_HIGHLIGHT_SUFFIX: &'static str = "</em>";
|
const DEFAULT_HIGHLIGHT_SUFFIX: &'static str = "</em>";
|
||||||
|
|
||||||
pub struct MatcherBuilder {
|
pub struct MatcherBuilder {
|
||||||
matching_words: MatchingWords,
|
matching_words: MatchingWords,
|
||||||
crop_size: usize,
|
|
||||||
crop_marker: Option<String>,
|
crop_marker: Option<String>,
|
||||||
highlight_prefix: Option<String>,
|
highlight_prefix: Option<String>,
|
||||||
highlight_suffix: Option<String>,
|
highlight_suffix: Option<String>,
|
||||||
@ -23,18 +21,7 @@ pub struct MatcherBuilder {
|
|||||||
|
|
||||||
impl MatcherBuilder {
|
impl MatcherBuilder {
|
||||||
pub fn from_matching_words(matching_words: MatchingWords) -> Self {
|
pub fn from_matching_words(matching_words: MatchingWords) -> Self {
|
||||||
Self {
|
Self { matching_words, crop_marker: None, highlight_prefix: None, highlight_suffix: None }
|
||||||
matching_words,
|
|
||||||
crop_size: DEFAULT_CROP_SIZE,
|
|
||||||
crop_marker: None,
|
|
||||||
highlight_prefix: None,
|
|
||||||
highlight_suffix: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn crop_size(&mut self, word_count: usize) -> &Self {
|
|
||||||
self.crop_size = word_count;
|
|
||||||
self
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn crop_marker(&mut self, marker: String) -> &Self {
|
pub fn crop_marker(&mut self, marker: String) -> &Self {
|
||||||
@ -70,7 +57,6 @@ impl MatcherBuilder {
|
|||||||
text,
|
text,
|
||||||
tokens,
|
tokens,
|
||||||
matching_words: &self.matching_words,
|
matching_words: &self.matching_words,
|
||||||
crop_size: self.crop_size,
|
|
||||||
crop_marker,
|
crop_marker,
|
||||||
highlight_prefix,
|
highlight_prefix,
|
||||||
highlight_suffix,
|
highlight_suffix,
|
||||||
@ -79,6 +65,18 @@ impl MatcherBuilder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Default)]
|
||||||
|
pub struct FormatOptions {
|
||||||
|
pub highlight: bool,
|
||||||
|
pub crop: Option<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FormatOptions {
|
||||||
|
pub fn merge(self, other: Self) -> Self {
|
||||||
|
Self { highlight: self.highlight || other.highlight, crop: self.crop.or(other.crop) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct Match {
|
pub struct Match {
|
||||||
match_len: usize,
|
match_len: usize,
|
||||||
@ -100,7 +98,6 @@ pub struct Matcher<'t, 'm> {
|
|||||||
text: &'t str,
|
text: &'t str,
|
||||||
tokens: &'t [Token<'t>],
|
tokens: &'t [Token<'t>],
|
||||||
matching_words: &'m MatchingWords,
|
matching_words: &'m MatchingWords,
|
||||||
crop_size: usize,
|
|
||||||
crop_marker: &'m str,
|
crop_marker: &'m str,
|
||||||
highlight_prefix: &'m str,
|
highlight_prefix: &'m str,
|
||||||
highlight_suffix: &'m str,
|
highlight_suffix: &'m str,
|
||||||
@ -233,7 +230,7 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the bounds in byte index of the crop window.
|
/// Returns the bounds in byte index of the crop window.
|
||||||
fn crop_bounds(&self, matches: &[Match]) -> (usize, usize) {
|
fn crop_bounds(&self, matches: &[Match], crop_size: usize) -> (usize, usize) {
|
||||||
// if there is no match, we start from the beginning of the string by default.
|
// if there is no match, we start from the beginning of the string by default.
|
||||||
let first_match_word_position = matches.first().map(|m| m.word_position).unwrap_or(0);
|
let first_match_word_position = matches.first().map(|m| m.word_position).unwrap_or(0);
|
||||||
let first_match_token_position = matches.first().map(|m| m.token_position).unwrap_or(0);
|
let first_match_token_position = matches.first().map(|m| m.token_position).unwrap_or(0);
|
||||||
@ -241,8 +238,7 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
let last_match_token_position = matches.last().map(|m| m.token_position).unwrap_or(0);
|
let last_match_token_position = matches.last().map(|m| m.token_position).unwrap_or(0);
|
||||||
|
|
||||||
// matches needs to be counted in the crop len.
|
// matches needs to be counted in the crop len.
|
||||||
let mut remaining_words =
|
let mut remaining_words = crop_size + first_match_word_position - last_match_word_position;
|
||||||
self.crop_size + first_match_word_position - last_match_word_position;
|
|
||||||
|
|
||||||
let mut before_tokens = self.tokens[..first_match_token_position].iter().rev().peekable();
|
let mut before_tokens = self.tokens[..first_match_token_position].iter().rev().peekable();
|
||||||
let mut after_tokens = self.tokens[last_match_token_position..].iter().peekable();
|
let mut after_tokens = self.tokens[last_match_token_position..].iter().peekable();
|
||||||
@ -348,7 +344,7 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the matches interval where the score computed by match_interval_score is maximal.
|
/// Returns the matches interval where the score computed by match_interval_score is maximal.
|
||||||
fn find_best_match_interval<'a>(&self, matches: &'a [Match]) -> &'a [Match] {
|
fn find_best_match_interval<'a>(&self, matches: &'a [Match], crop_size: usize) -> &'a [Match] {
|
||||||
// we compute the matches interval if we have at least 2 matches.
|
// we compute the matches interval if we have at least 2 matches.
|
||||||
if matches.len() > 1 {
|
if matches.len() > 1 {
|
||||||
// positions of the first and the last match of the best matches interval in `matches`.
|
// positions of the first and the last match of the best matches interval in `matches`.
|
||||||
@ -361,9 +357,7 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
// if next match would make interval gross more than crop_size,
|
// if next match would make interval gross more than crop_size,
|
||||||
// we compare the current interval with the best one,
|
// we compare the current interval with the best one,
|
||||||
// then we increase `interval_first` until next match can be added.
|
// then we increase `interval_first` until next match can be added.
|
||||||
if next_match.word_position - matches[interval_first].word_position
|
if next_match.word_position - matches[interval_first].word_position >= crop_size {
|
||||||
>= self.crop_size
|
|
||||||
{
|
|
||||||
let interval_score =
|
let interval_score =
|
||||||
self.match_interval_score(&matches[interval_first..=interval_last]);
|
self.match_interval_score(&matches[interval_first..=interval_last]);
|
||||||
|
|
||||||
@ -375,7 +369,7 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
|
|
||||||
// advance start of the interval while interval is longer than crop_size.
|
// advance start of the interval while interval is longer than crop_size.
|
||||||
while next_match.word_position - matches[interval_first].word_position
|
while next_match.word_position - matches[interval_first].word_position
|
||||||
>= self.crop_size
|
>= crop_size
|
||||||
{
|
{
|
||||||
interval_first += 1;
|
interval_first += 1;
|
||||||
}
|
}
|
||||||
@ -397,21 +391,24 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Returns the formatted version of the original text.
|
// Returns the formatted version of the original text.
|
||||||
pub fn format(&mut self, highlight: bool, crop: bool) -> Cow<'t, str> {
|
pub fn format(&mut self, format_options: FormatOptions) -> Cow<'t, str> {
|
||||||
// If 0 it will be considered null and thus not crop the field
|
if !format_options.highlight && format_options.crop.is_none() {
|
||||||
// https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
|
|
||||||
let crop = crop && self.crop_size > 0;
|
|
||||||
if !highlight && !crop {
|
|
||||||
// compute matches is not needed if no highlight nor crop is requested.
|
// compute matches is not needed if no highlight nor crop is requested.
|
||||||
Cow::Borrowed(self.text)
|
Cow::Borrowed(self.text)
|
||||||
} else {
|
} else {
|
||||||
match &self.matches {
|
match &self.matches {
|
||||||
Some(matches) => {
|
Some(matches) => {
|
||||||
let matches =
|
let matches = match format_options.crop {
|
||||||
if crop { self.find_best_match_interval(matches) } else { matches };
|
Some(crop_size) if crop_size > 0 => {
|
||||||
|
self.find_best_match_interval(matches, crop_size)
|
||||||
|
}
|
||||||
|
_ => matches,
|
||||||
|
};
|
||||||
|
|
||||||
let (byte_start, byte_end) =
|
let (byte_start, byte_end) = match format_options.crop {
|
||||||
if crop { self.crop_bounds(matches) } else { (0, self.text.len()) };
|
Some(crop_size) if crop_size > 0 => self.crop_bounds(matches, crop_size),
|
||||||
|
_ => (0, self.text.len()),
|
||||||
|
};
|
||||||
|
|
||||||
let mut formatted = Vec::new();
|
let mut formatted = Vec::new();
|
||||||
|
|
||||||
@ -422,7 +419,7 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
|
|
||||||
let mut byte_index = byte_start;
|
let mut byte_index = byte_start;
|
||||||
|
|
||||||
if highlight {
|
if format_options.highlight {
|
||||||
// insert highlight markers around matches.
|
// insert highlight markers around matches.
|
||||||
let tokens = self.tokens;
|
let tokens = self.tokens;
|
||||||
for m in matches {
|
for m in matches {
|
||||||
@ -466,7 +463,7 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
Cow::Owned(formatted.concat())
|
Cow::Owned(formatted.concat())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => self.compute_matches().format(highlight, crop),
|
None => self.compute_matches().format(format_options),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -496,8 +493,7 @@ mod tests {
|
|||||||
let builder = MatcherBuilder::from_matching_words(matching_words);
|
let builder = MatcherBuilder::from_matching_words(matching_words);
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
||||||
|
|
||||||
let highlight = false;
|
let format_options = FormatOptions { highlight: false, crop: None };
|
||||||
let crop = false;
|
|
||||||
|
|
||||||
// Text without any match.
|
// Text without any match.
|
||||||
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
|
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
|
||||||
@ -505,7 +501,7 @@ mod tests {
|
|||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// no crop and no highlight should return complete text.
|
// no crop and no highlight should return complete text.
|
||||||
assert_eq!(&matcher.format(highlight, crop), &text);
|
assert_eq!(&matcher.format(format_options.clone()), &text);
|
||||||
|
|
||||||
// Text containing all matches.
|
// Text containing all matches.
|
||||||
let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
|
let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
|
||||||
@ -513,7 +509,7 @@ mod tests {
|
|||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// no crop and no highlight should return complete text.
|
// no crop and no highlight should return complete text.
|
||||||
assert_eq!(&matcher.format(highlight, crop), &text);
|
assert_eq!(&matcher.format(format_options.clone()), &text);
|
||||||
|
|
||||||
// Text containing some matches.
|
// Text containing some matches.
|
||||||
let text = "Natalie risk her future to build a world with the boy she loves.";
|
let text = "Natalie risk her future to build a world with the boy she loves.";
|
||||||
@ -521,7 +517,7 @@ mod tests {
|
|||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// no crop and no highlight should return complete text.
|
// no crop and no highlight should return complete text.
|
||||||
assert_eq!(&matcher.format(highlight, crop), &text);
|
assert_eq!(&matcher.format(format_options.clone()), &text);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -531,22 +527,21 @@ mod tests {
|
|||||||
let builder = MatcherBuilder::from_matching_words(matching_words);
|
let builder = MatcherBuilder::from_matching_words(matching_words);
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
||||||
|
|
||||||
let highlight = true;
|
let format_options = FormatOptions { highlight: true, crop: None };
|
||||||
let crop = false;
|
|
||||||
|
|
||||||
// empty text.
|
// empty text.
|
||||||
let text = "";
|
let text = "";
|
||||||
let analyzed = analyzer.analyze(&text);
|
let analyzed = analyzer.analyze(&text);
|
||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
assert_eq!(&matcher.format(highlight, crop), "");
|
assert_eq!(&matcher.format(format_options.clone()), "");
|
||||||
|
|
||||||
// text containing only separators.
|
// text containing only separators.
|
||||||
let text = ":-)";
|
let text = ":-)";
|
||||||
let analyzed = analyzer.analyze(&text);
|
let analyzed = analyzer.analyze(&text);
|
||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
assert_eq!(&matcher.format(highlight, crop), ":-)");
|
assert_eq!(&matcher.format(format_options.clone()), ":-)");
|
||||||
|
|
||||||
// Text without any match.
|
// Text without any match.
|
||||||
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
|
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
|
||||||
@ -554,7 +549,7 @@ mod tests {
|
|||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// no crop should return complete text, because there is no matches.
|
// no crop should return complete text, because there is no matches.
|
||||||
assert_eq!(&matcher.format(highlight, crop), &text);
|
assert_eq!(&matcher.format(format_options.clone()), &text);
|
||||||
|
|
||||||
// Text containing all matches.
|
// Text containing all matches.
|
||||||
let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
|
let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
|
||||||
@ -562,7 +557,7 @@ mod tests {
|
|||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// no crop should return complete text with highlighted matches.
|
// no crop should return complete text with highlighted matches.
|
||||||
assert_eq!(&matcher.format(highlight, crop), "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>.");
|
assert_eq!(&matcher.format(format_options.clone()), "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>.");
|
||||||
|
|
||||||
// Text containing some matches.
|
// Text containing some matches.
|
||||||
let text = "Natalie risk her future to build a world with the boy she loves.";
|
let text = "Natalie risk her future to build a world with the boy she loves.";
|
||||||
@ -571,7 +566,7 @@ mod tests {
|
|||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// no crop should return complete text with highlighted matches.
|
// no crop should return complete text with highlighted matches.
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&matcher.format(highlight, crop),
|
&matcher.format(format_options.clone()),
|
||||||
"Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves."
|
"Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves."
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -588,8 +583,7 @@ mod tests {
|
|||||||
let builder = MatcherBuilder::from_matching_words(matching_words);
|
let builder = MatcherBuilder::from_matching_words(matching_words);
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
||||||
|
|
||||||
let highlight = true;
|
let format_options = FormatOptions { highlight: true, crop: None };
|
||||||
let crop = false;
|
|
||||||
|
|
||||||
// Text containing prefix match.
|
// Text containing prefix match.
|
||||||
let text = "Ŵôřlḑôle";
|
let text = "Ŵôřlḑôle";
|
||||||
@ -597,7 +591,7 @@ mod tests {
|
|||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// no crop should return complete text with highlighted matches.
|
// no crop should return complete text with highlighted matches.
|
||||||
assert_eq!(&matcher.format(highlight, crop), "<em>Ŵôřlḑ</em>ôle");
|
assert_eq!(&matcher.format(format_options.clone()), "<em>Ŵôřlḑ</em>ôle");
|
||||||
|
|
||||||
// Text containing unicode match.
|
// Text containing unicode match.
|
||||||
let text = "Ŵôřlḑ";
|
let text = "Ŵôřlḑ";
|
||||||
@ -605,7 +599,7 @@ mod tests {
|
|||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// no crop should return complete text with highlighted matches.
|
// no crop should return complete text with highlighted matches.
|
||||||
assert_eq!(&matcher.format(highlight, crop), "<em>Ŵôřlḑ</em>");
|
assert_eq!(&matcher.format(format_options.clone()), "<em>Ŵôřlḑ</em>");
|
||||||
|
|
||||||
// Text containing unicode match.
|
// Text containing unicode match.
|
||||||
let text = "Westfália";
|
let text = "Westfália";
|
||||||
@ -613,7 +607,7 @@ mod tests {
|
|||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// no crop should return complete text with highlighted matches.
|
// no crop should return complete text with highlighted matches.
|
||||||
assert_eq!(&matcher.format(highlight, crop), "<em>Westfáli</em>a");
|
assert_eq!(&matcher.format(format_options.clone()), "<em>Westfáli</em>a");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -623,22 +617,21 @@ mod tests {
|
|||||||
let builder = MatcherBuilder::from_matching_words(matching_words);
|
let builder = MatcherBuilder::from_matching_words(matching_words);
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
||||||
|
|
||||||
let highlight = false;
|
let format_options = FormatOptions { highlight: false, crop: Some(10) };
|
||||||
let crop = true;
|
|
||||||
|
|
||||||
// empty text.
|
// empty text.
|
||||||
let text = "";
|
let text = "";
|
||||||
let analyzed = analyzer.analyze(&text);
|
let analyzed = analyzer.analyze(&text);
|
||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
assert_eq!(&matcher.format(highlight, crop), "");
|
assert_eq!(&matcher.format(format_options.clone()), "");
|
||||||
|
|
||||||
// text containing only separators.
|
// text containing only separators.
|
||||||
let text = ":-)";
|
let text = ":-)";
|
||||||
let analyzed = analyzer.analyze(&text);
|
let analyzed = analyzer.analyze(&text);
|
||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
assert_eq!(&matcher.format(highlight, crop), ":-)");
|
assert_eq!(&matcher.format(format_options.clone()), ":-)");
|
||||||
|
|
||||||
// Text without any match.
|
// Text without any match.
|
||||||
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
|
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
|
||||||
@ -647,7 +640,7 @@ mod tests {
|
|||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// no highlight should return 10 first words with a marker at the end.
|
// no highlight should return 10 first words with a marker at the end.
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&matcher.format(highlight, crop),
|
&matcher.format(format_options.clone()),
|
||||||
"A quick brown fox can not jump 32 feet, right…"
|
"A quick brown fox can not jump 32 feet, right…"
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -658,7 +651,7 @@ mod tests {
|
|||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// no highlight should return 10 first words with a marker at the end.
|
// no highlight should return 10 first words with a marker at the end.
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&matcher.format(highlight, crop),
|
&matcher.format(format_options.clone()),
|
||||||
"(A quick brown fox can not jump 32 feet, right…"
|
"(A quick brown fox can not jump 32 feet, right…"
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -669,7 +662,7 @@ mod tests {
|
|||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// should crop the phrase instead of croping around the match.
|
// should crop the phrase instead of croping around the match.
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&matcher.format(highlight, crop),
|
&matcher.format(format_options.clone()),
|
||||||
"…Split The World is a book written by Emily Henry…"
|
"…Split The World is a book written by Emily Henry…"
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -680,7 +673,7 @@ mod tests {
|
|||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// no highlight should return 10 last words with a marker at the start.
|
// no highlight should return 10 last words with a marker at the start.
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&matcher.format(highlight, crop),
|
&matcher.format(format_options.clone()),
|
||||||
"…future to build a world with the boy she loves…"
|
"…future to build a world with the boy she loves…"
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -691,7 +684,7 @@ mod tests {
|
|||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// no highlight should return 10 last words with a marker at the start.
|
// no highlight should return 10 last words with a marker at the start.
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&matcher.format(highlight, crop),
|
&matcher.format(format_options.clone()),
|
||||||
"…she loves. Emily Henry: The Love That Split The World."
|
"…she loves. Emily Henry: The Love That Split The World."
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -702,7 +695,7 @@ mod tests {
|
|||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// crop should return 10 last words with a marker at the start.
|
// crop should return 10 last words with a marker at the start.
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&matcher.format(highlight, crop),
|
&matcher.format(format_options.clone()),
|
||||||
"…void void void void void split the world void void"
|
"…void void void void void split the world void void"
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -713,7 +706,7 @@ mod tests {
|
|||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// crop should return 10 last words with a marker at the start.
|
// crop should return 10 last words with a marker at the start.
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&matcher.format(highlight, crop),
|
&matcher.format(format_options.clone()),
|
||||||
"…void void void void void split the world void void"
|
"…void void void void void split the world void void"
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -724,7 +717,7 @@ mod tests {
|
|||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// crop should return 10 last words with a marker at the start.
|
// crop should return 10 last words with a marker at the start.
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&matcher.format(highlight, crop),
|
&matcher.format(format_options.clone()),
|
||||||
"…void void void void void split the world void void"
|
"…void void void void void split the world void void"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -736,22 +729,21 @@ mod tests {
|
|||||||
let builder = MatcherBuilder::from_matching_words(matching_words);
|
let builder = MatcherBuilder::from_matching_words(matching_words);
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
||||||
|
|
||||||
let highlight = true;
|
let format_options = FormatOptions { highlight: true, crop: Some(10) };
|
||||||
let crop = true;
|
|
||||||
|
|
||||||
// empty text.
|
// empty text.
|
||||||
let text = "";
|
let text = "";
|
||||||
let analyzed = analyzer.analyze(&text);
|
let analyzed = analyzer.analyze(&text);
|
||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
assert_eq!(&matcher.format(highlight, crop), "");
|
assert_eq!(&matcher.format(format_options.clone()), "");
|
||||||
|
|
||||||
// text containing only separators.
|
// text containing only separators.
|
||||||
let text = ":-)";
|
let text = ":-)";
|
||||||
let analyzed = analyzer.analyze(&text);
|
let analyzed = analyzer.analyze(&text);
|
||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
assert_eq!(&matcher.format(highlight, crop), ":-)");
|
assert_eq!(&matcher.format(format_options.clone()), ":-)");
|
||||||
|
|
||||||
// Text without any match.
|
// Text without any match.
|
||||||
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
|
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
|
||||||
@ -760,7 +752,7 @@ mod tests {
|
|||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// both should return 10 first words with a marker at the end.
|
// both should return 10 first words with a marker at the end.
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&matcher.format(highlight, crop),
|
&matcher.format(format_options.clone()),
|
||||||
"A quick brown fox can not jump 32 feet, right…"
|
"A quick brown fox can not jump 32 feet, right…"
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -771,7 +763,7 @@ mod tests {
|
|||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// both should return 10 last words with a marker at the start and highlighted matches.
|
// both should return 10 last words with a marker at the start and highlighted matches.
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&matcher.format(highlight, crop),
|
&matcher.format(format_options.clone()),
|
||||||
"…future to build a <em>world</em> with <em>the</em> boy she loves…"
|
"…future to build a <em>world</em> with <em>the</em> boy she loves…"
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -781,7 +773,7 @@ mod tests {
|
|||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// both should return 10 last words with a marker at the start and highlighted matches.
|
// both should return 10 last words with a marker at the start and highlighted matches.
|
||||||
assert_eq!(&matcher.format(highlight, crop), "…she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>.");
|
assert_eq!(&matcher.format(format_options.clone()), "…she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>.");
|
||||||
|
|
||||||
// Text containing a match unordered and a match ordered.
|
// Text containing a match unordered and a match ordered.
|
||||||
let text = "The world split void void void void void void void void void split the world void void";
|
let text = "The world split void void void void void void void void void split the world void void";
|
||||||
@ -790,7 +782,7 @@ mod tests {
|
|||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// crop should return 10 last words with a marker at the start.
|
// crop should return 10 last words with a marker at the start.
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&matcher.format(highlight, crop),
|
&matcher.format(format_options.clone()),
|
||||||
"…void void void void void <em>split</em> <em>the</em> <em>world</em> void void"
|
"…void void void void void <em>split</em> <em>the</em> <em>world</em> void void"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -800,33 +792,33 @@ mod tests {
|
|||||||
//! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
|
//! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
|
||||||
let matching_words = matching_words();
|
let matching_words = matching_words();
|
||||||
|
|
||||||
let mut builder = MatcherBuilder::from_matching_words(matching_words);
|
let builder = MatcherBuilder::from_matching_words(matching_words);
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
||||||
|
|
||||||
let highlight = false;
|
|
||||||
let crop = true;
|
|
||||||
|
|
||||||
let text = "void void split the world void void.";
|
let text = "void void split the world void void.";
|
||||||
let analyzed = analyzer.analyze(&text);
|
let analyzed = analyzer.analyze(&text);
|
||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
|
|
||||||
// set a smaller crop size
|
// set a smaller crop size
|
||||||
builder.crop_size(2);
|
let format_options = FormatOptions { highlight: false, crop: Some(2) };
|
||||||
|
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// because crop size < query size, partially format matches.
|
// because crop size < query size, partially format matches.
|
||||||
assert_eq!(&matcher.format(highlight, crop), "…split the…");
|
assert_eq!(&matcher.format(format_options), "…split the…");
|
||||||
|
|
||||||
// set a smaller crop size
|
// set a smaller crop size
|
||||||
builder.crop_size(1);
|
let format_options = FormatOptions { highlight: false, crop: Some(1) };
|
||||||
|
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// because crop size < query size, partially format matches.
|
// because crop size < query size, partially format matches.
|
||||||
assert_eq!(&matcher.format(highlight, crop), "…split…");
|
assert_eq!(&matcher.format(format_options), "…split…");
|
||||||
|
|
||||||
|
// set crop size to 0
|
||||||
|
let format_options = FormatOptions { highlight: false, crop: Some(0) };
|
||||||
|
|
||||||
// set a smaller crop size
|
|
||||||
builder.crop_size(0);
|
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// because crop size is 0, crop is ignored.
|
// because crop size is 0, crop is ignored.
|
||||||
assert_eq!(&matcher.format(highlight, crop), "void void split the world void void.");
|
assert_eq!(&matcher.format(format_options), "void void split the world void void.");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -858,8 +850,7 @@ mod tests {
|
|||||||
builder.highlight_suffix("_".to_string());
|
builder.highlight_suffix("_".to_string());
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
||||||
|
|
||||||
let highlight = true;
|
let format_options = FormatOptions { highlight: true, crop: None };
|
||||||
let crop = false;
|
|
||||||
|
|
||||||
let text = "the do or die can't be he do and or isn't he";
|
let text = "the do or die can't be he do and or isn't he";
|
||||||
let analyzed = analyzer.analyze(&text);
|
let analyzed = analyzer.analyze(&text);
|
||||||
@ -867,7 +858,7 @@ mod tests {
|
|||||||
|
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&matcher.format(highlight, crop),
|
&matcher.format(format_options),
|
||||||
"_the_ _do_ _or_ die can't be he _do_ and or isn'_t_ _he_",
|
"_the_ _do_ _or_ die can't be he _do_ and or isn'_t_ _he_",
|
||||||
"matches: {:?}",
|
"matches: {:?}",
|
||||||
&matcher.matches
|
&matcher.matches
|
||||||
|
@ -17,7 +17,7 @@ use roaring::bitmap::RoaringBitmap;
|
|||||||
|
|
||||||
pub use self::facet::{FacetDistribution, FacetNumberIter, Filter};
|
pub use self::facet::{FacetDistribution, FacetNumberIter, Filter};
|
||||||
use self::fst_utils::{Complement, Intersection, StartsWith, Union};
|
use self::fst_utils::{Complement, Intersection, StartsWith, Union};
|
||||||
pub use self::matches::{MatchBounds, Matcher, MatcherBuilder, MatchingWords};
|
pub use self::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
|
||||||
use self::query_tree::QueryTreeBuilder;
|
use self::query_tree::QueryTreeBuilder;
|
||||||
use crate::error::UserError;
|
use crate::error::UserError;
|
||||||
use crate::search::criteria::r#final::{Final, FinalResult};
|
use crate::search::criteria::r#final::{Final, FinalResult};
|
||||||
|
Loading…
Reference in New Issue
Block a user