mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-25 19:45:05 +08:00
Refactor, handle more cases for phrases
This commit is contained in:
parent
00ccf53ffa
commit
eabc14c268
@ -181,7 +181,7 @@ impl<'a> PartialMatch<'a> {
|
|||||||
// return a new Partial match allowing the highlighter to continue.
|
// return a new Partial match allowing the highlighter to continue.
|
||||||
if is_matching && matching_words.len() > 1 {
|
if is_matching && matching_words.len() > 1 {
|
||||||
matching_words.remove(0);
|
matching_words.remove(0);
|
||||||
Some(MatchType::Partial(PartialMatch { matching_words, ids, char_len }))
|
Some(MatchType::Partial(Self { matching_words, ids, char_len }))
|
||||||
// if there is no remaining word to match in the phrase and the current token is matching,
|
// if there is no remaining word to match in the phrase and the current token is matching,
|
||||||
// return a Full match.
|
// return a Full match.
|
||||||
} else if is_matching {
|
} else if is_matching {
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
use charabia::{Language, SeparatorKind, Token, Tokenizer};
|
use charabia::{Language, SeparatorKind, Token, TokenKind, Tokenizer};
|
||||||
pub use matching_words::MatchingWords;
|
pub use matching_words::MatchingWords;
|
||||||
use matching_words::{MatchType, PartialMatch, WordId};
|
use matching_words::{MatchType, PartialMatch, WordId};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
@ -145,6 +145,13 @@ impl Match {
|
|||||||
MatchPosition::Phrase { token_positions: (_, ltp), .. } => ltp,
|
MatchPosition::Phrase { token_positions: (_, ltp), .. } => ltp,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn get_word_count(&self) -> usize {
|
||||||
|
match self.position {
|
||||||
|
MatchPosition::Word { .. } => 1,
|
||||||
|
MatchPosition::Phrase { word_positions: (fwp, lwp), .. } => lwp - fwp + 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
|
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
|
||||||
@ -153,6 +160,27 @@ pub struct MatchBounds {
|
|||||||
pub length: usize,
|
pub length: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum SimpleTokenKind {
|
||||||
|
Separator(SeparatorKind),
|
||||||
|
NotSeparator,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SimpleTokenKind {
|
||||||
|
fn get(token: &&Token<'_>) -> Self {
|
||||||
|
match token.kind {
|
||||||
|
TokenKind::Separator(separaor_kind) => Self::Separator(separaor_kind),
|
||||||
|
_ => Self::NotSeparator,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_not_separator(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
SimpleTokenKind::NotSeparator => true,
|
||||||
|
SimpleTokenKind::Separator(_) => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Structure used to analyze a string, compute words that match,
|
/// Structure used to analyze a string, compute words that match,
|
||||||
/// and format the source string, returning a highlighted and cropped sub-string.
|
/// and format the source string, returning a highlighted and cropped sub-string.
|
||||||
pub struct Matcher<'t, 'tokenizer, 'b, 'lang> {
|
pub struct Matcher<'t, 'tokenizer, 'b, 'lang> {
|
||||||
@ -287,95 +315,130 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
|||||||
crop_size: usize,
|
crop_size: usize,
|
||||||
) -> (usize, usize) {
|
) -> (usize, usize) {
|
||||||
// if there is no match, we start from the beginning of the string by default.
|
// if there is no match, we start from the beginning of the string by default.
|
||||||
let first_match_word_position =
|
let first_match_first_word_position =
|
||||||
matches.first().map(|m| m.get_first_word_pos()).unwrap_or(0);
|
matches.first().map(|m| m.get_first_word_pos()).unwrap_or(0);
|
||||||
let first_match_token_position =
|
let first_match_first_token_position =
|
||||||
matches.first().map(|m| m.get_first_token_pos()).unwrap_or(0);
|
matches.first().map(|m| m.get_first_token_pos()).unwrap_or(0);
|
||||||
let last_match_word_position = matches.last().map(|m| m.get_last_word_pos()).unwrap_or(0);
|
let last_match_last_word_position =
|
||||||
let last_match_token_position = matches.last().map(|m| m.get_last_token_pos()).unwrap_or(0);
|
matches.last().map(|m| m.get_last_word_pos()).unwrap_or(0);
|
||||||
|
let last_match_last_token_position =
|
||||||
|
matches.last().map(|m| m.get_last_token_pos()).unwrap_or(0);
|
||||||
|
|
||||||
// matches needs to be counted in the crop len.
|
let matches_window_len =
|
||||||
let mut remaining_words = crop_size + first_match_word_position - last_match_word_position;
|
last_match_last_word_position - first_match_first_word_position + 1;
|
||||||
|
|
||||||
// create the initial state of the crop window: 2 iterators starting from the matches positions,
|
if crop_size >= matches_window_len {
|
||||||
// a reverse iterator starting from the first match token position and going towards the beginning of the text,
|
// matches needs to be counted in the crop len.
|
||||||
let mut before_tokens = tokens[..first_match_token_position].iter().rev().peekable();
|
let mut remaining_words = crop_size - matches_window_len;
|
||||||
// an iterator starting from the last match token position and going towards the end of the text.
|
|
||||||
let mut after_tokens = tokens[last_match_token_position..].iter().peekable();
|
|
||||||
|
|
||||||
// grows the crop window peeking in both directions
|
// create the initial state of the crop window: 2 iterators starting from the matches positions,
|
||||||
// until the window contains the good number of words:
|
// a reverse iterator starting from the first match token position and going towards the beginning of the text,
|
||||||
while remaining_words > 0 {
|
let mut before_tokens =
|
||||||
let before_token = before_tokens.peek().map(|t| t.separator_kind());
|
tokens[..first_match_first_token_position].iter().rev().peekable();
|
||||||
let after_token = after_tokens.peek().map(|t| t.separator_kind());
|
// an iterator starting from the last match token position and going towards the end of the text.
|
||||||
|
let mut after_tokens = tokens[last_match_last_token_position + 1..].iter().peekable();
|
||||||
|
|
||||||
match (before_token, after_token) {
|
// grows the crop window peeking in both directions
|
||||||
// we can expand both sides.
|
// until the window contains the good number of words:
|
||||||
(Some(before_token), Some(after_token)) => {
|
while remaining_words > 0 {
|
||||||
match (before_token, after_token) {
|
let before_token_kind = before_tokens.peek().map(SimpleTokenKind::get);
|
||||||
// if they are both separators and are the same kind then advance both,
|
let after_token_kind = after_tokens.peek().map(SimpleTokenKind::get);
|
||||||
// or expand in the soft separator separator side.
|
|
||||||
(Some(before_token_kind), Some(after_token_kind)) => {
|
|
||||||
if before_token_kind == after_token_kind {
|
|
||||||
before_tokens.next();
|
|
||||||
|
|
||||||
// this avoid having an ending separator before crop marker.
|
match (before_token_kind, after_token_kind) {
|
||||||
if remaining_words > 1 {
|
// we can expand both sides.
|
||||||
|
(Some(before_token_kind), Some(after_token_kind)) => {
|
||||||
|
match (before_token_kind, after_token_kind) {
|
||||||
|
// if they are both separators and are the same kind then advance both,
|
||||||
|
// or expand in the soft separator separator side.
|
||||||
|
(
|
||||||
|
SimpleTokenKind::Separator(before_token_separator_kind),
|
||||||
|
SimpleTokenKind::Separator(after_token_separator_kind),
|
||||||
|
) => {
|
||||||
|
if before_token_separator_kind == after_token_separator_kind {
|
||||||
|
before_tokens.next();
|
||||||
|
|
||||||
|
// this avoid having an ending separator before crop marker.
|
||||||
|
if remaining_words > 1 {
|
||||||
|
after_tokens.next();
|
||||||
|
}
|
||||||
|
} else if let SeparatorKind::Hard = before_token_separator_kind {
|
||||||
after_tokens.next();
|
after_tokens.next();
|
||||||
|
} else {
|
||||||
|
before_tokens.next();
|
||||||
}
|
}
|
||||||
} else if before_token_kind == SeparatorKind::Hard {
|
|
||||||
after_tokens.next();
|
|
||||||
} else {
|
|
||||||
before_tokens.next();
|
|
||||||
}
|
}
|
||||||
}
|
// if one of the tokens is a word, we expend in the side of the word.
|
||||||
// if one of the tokens is a word, we expend in the side of the word.
|
// left is a word, advance left.
|
||||||
// left is a word, advance left.
|
(SimpleTokenKind::NotSeparator, SimpleTokenKind::Separator(_)) => {
|
||||||
(None, Some(_)) => {
|
before_tokens.next();
|
||||||
before_tokens.next();
|
remaining_words -= 1;
|
||||||
remaining_words -= 1;
|
}
|
||||||
}
|
// right is a word, advance right.
|
||||||
// right is a word, advance right.
|
(SimpleTokenKind::Separator(_), SimpleTokenKind::NotSeparator) => {
|
||||||
(Some(_), None) => {
|
|
||||||
after_tokens.next();
|
|
||||||
remaining_words -= 1;
|
|
||||||
}
|
|
||||||
// both are words, advance left then right if remaining_word > 0.
|
|
||||||
(None, None) => {
|
|
||||||
before_tokens.next();
|
|
||||||
remaining_words -= 1;
|
|
||||||
|
|
||||||
if remaining_words > 0 {
|
|
||||||
after_tokens.next();
|
after_tokens.next();
|
||||||
remaining_words -= 1;
|
remaining_words -= 1;
|
||||||
}
|
}
|
||||||
|
// both are words, advance left then right if remaining_word > 0.
|
||||||
|
(SimpleTokenKind::NotSeparator, SimpleTokenKind::NotSeparator) => {
|
||||||
|
before_tokens.next();
|
||||||
|
remaining_words -= 1;
|
||||||
|
|
||||||
|
if remaining_words > 0 {
|
||||||
|
after_tokens.next();
|
||||||
|
remaining_words -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
// the end of the text is reached, advance left.
|
||||||
// the end of the text is reached, advance left.
|
(Some(before_token_kind), None) => {
|
||||||
(Some(before_token), None) => {
|
before_tokens.next();
|
||||||
before_tokens.next();
|
if let SimpleTokenKind::NotSeparator = before_token_kind {
|
||||||
if before_token.is_none() {
|
remaining_words -= 1;
|
||||||
remaining_words -= 1;
|
}
|
||||||
}
|
}
|
||||||
}
|
// the start of the text is reached, advance right.
|
||||||
// the start of the text is reached, advance right.
|
(None, Some(after_token_kind)) => {
|
||||||
(None, Some(after_token)) => {
|
after_tokens.next();
|
||||||
after_tokens.next();
|
if let SimpleTokenKind::NotSeparator = after_token_kind {
|
||||||
if after_token.is_none() {
|
remaining_words -= 1;
|
||||||
remaining_words -= 1;
|
}
|
||||||
}
|
}
|
||||||
|
// no more token to add.
|
||||||
|
(None, None) => break,
|
||||||
}
|
}
|
||||||
// no more token to add.
|
|
||||||
(None, None) => break,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// finally, keep the byte index of each bound of the crop window.
|
||||||
|
let crop_byte_start = before_tokens.next().map_or(0, |t| t.byte_end);
|
||||||
|
let crop_byte_end = after_tokens.next().map_or(self.text.len(), |t| t.byte_start);
|
||||||
|
|
||||||
|
(crop_byte_start, crop_byte_end)
|
||||||
|
} else {
|
||||||
|
// there's one match? and it's longer than the crop window, so we have to advance inward
|
||||||
|
let mut remaining_extra_words = matches_window_len - crop_size;
|
||||||
|
let mut tokens_from_end =
|
||||||
|
tokens[..=last_match_last_token_position].iter().rev().peekable();
|
||||||
|
|
||||||
|
while remaining_extra_words > 0 {
|
||||||
|
let token_from_end_kind =
|
||||||
|
tokens_from_end.peek().map(SimpleTokenKind::get).expect("TODO");
|
||||||
|
if token_from_end_kind.is_not_separator() {
|
||||||
|
remaining_extra_words -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
tokens_from_end.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
let crop_byte_start = if first_match_first_token_position > 0 {
|
||||||
|
&tokens[first_match_first_token_position - 1].byte_end
|
||||||
|
} else {
|
||||||
|
&0
|
||||||
|
};
|
||||||
|
let crop_byte_end = tokens_from_end.next().map(|t| t.byte_start).expect("TODO");
|
||||||
|
|
||||||
|
(*crop_byte_start, crop_byte_end)
|
||||||
}
|
}
|
||||||
|
|
||||||
// finally, keep the byte index of each bound of the crop window.
|
|
||||||
let crop_byte_start = before_tokens.next().map_or(0, |t| t.byte_end);
|
|
||||||
let crop_byte_end = after_tokens.next().map_or(self.text.len(), |t| t.byte_start);
|
|
||||||
|
|
||||||
(crop_byte_start, crop_byte_end)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compute the score of a match interval:
|
/// Compute the score of a match interval:
|
||||||
@ -416,11 +479,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
|||||||
lwp
|
lwp
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
let next_match_first_word_pos = next_match.get_first_word_pos();
|
||||||
let next_match_first_word_pos = match next_match.position {
|
|
||||||
MatchPosition::Word { word_position, .. } => word_position,
|
|
||||||
MatchPosition::Phrase { word_positions: (fwp, _), .. } => fwp,
|
|
||||||
};
|
|
||||||
|
|
||||||
// compute distance between matches
|
// compute distance between matches
|
||||||
distance_score -= (next_match_first_word_pos - m_last_word_pos).min(7) as i16;
|
distance_score -= (next_match_first_word_pos - m_last_word_pos).min(7) as i16;
|
||||||
@ -443,72 +502,96 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
|||||||
/// Returns the matches interval where the score computed by match_interval_score is the best.
|
/// Returns the matches interval where the score computed by match_interval_score is the best.
|
||||||
fn find_best_match_interval<'a>(&self, matches: &'a [Match], crop_size: usize) -> &'a [Match] {
|
fn find_best_match_interval<'a>(&self, matches: &'a [Match], crop_size: usize) -> &'a [Match] {
|
||||||
let matches_len = matches.len();
|
let matches_len = matches.len();
|
||||||
|
if matches_len <= 1 {
|
||||||
|
return matches;
|
||||||
|
}
|
||||||
|
|
||||||
|
// positions of the first and the last match of the best matches interval in `matches`.
|
||||||
|
struct BestInterval {
|
||||||
|
interval: (usize, usize),
|
||||||
|
score: (i16, i16, i16),
|
||||||
|
}
|
||||||
|
|
||||||
|
fn save_best_interval(
|
||||||
|
best_interval: &mut Option<BestInterval>,
|
||||||
|
interval_first: usize,
|
||||||
|
interval_last: usize,
|
||||||
|
interval_score: (i16, i16, i16),
|
||||||
|
) {
|
||||||
|
if let Some(best_interval) = best_interval {
|
||||||
|
if interval_score > best_interval.score {
|
||||||
|
best_interval.interval = (interval_first, interval_last);
|
||||||
|
best_interval.score = interval_score;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
*best_interval = Some(BestInterval {
|
||||||
|
interval: (interval_first, interval_last),
|
||||||
|
score: interval_score,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut best_interval: Option<BestInterval> = None;
|
||||||
|
|
||||||
// we compute the matches interval if we have at least 2 matches.
|
// we compute the matches interval if we have at least 2 matches.
|
||||||
if matches_len > 1 {
|
// current interval positions.
|
||||||
// current interval positions.
|
let mut interval_first = 0;
|
||||||
let mut interval_first = 0;
|
let mut interval_first_match_first_word_pos = matches[interval_first].get_first_word_pos();
|
||||||
// positions of the first and the last match of the best matches interval in `matches`.
|
|
||||||
let mut best_interval = (0, 0);
|
|
||||||
let mut best_interval_score = self.match_interval_score(&matches[0..=0]);
|
|
||||||
|
|
||||||
let mut index = 1;
|
for (index, next_match) in matches.iter().enumerate() {
|
||||||
while index < matches_len - 1 {
|
// if next match would make interval gross more than crop_size,
|
||||||
let next_match = &matches[index];
|
// we compare the current interval with the best one,
|
||||||
|
// then we increase `interval_first` until next match can be added.
|
||||||
|
let next_match_last_word_pos = next_match.get_last_word_pos();
|
||||||
|
|
||||||
// if next match would make interval gross more than crop_size,
|
// if the next match would mean that we pass the crop size window,
|
||||||
// we compare the current interval with the best one,
|
// we take the last valid match, that didn't pass this boundry, which is `index` - 1,
|
||||||
// then we increase `interval_first` until next match can be added.
|
// and calculate a score for it, and check if it's better than our best so far
|
||||||
let next_match_last_word_pos = next_match.get_last_word_pos();
|
if next_match_last_word_pos - interval_first_match_first_word_pos >= crop_size {
|
||||||
let interval_first_match_first_word_pos =
|
// if index is 0 there is no last viable match
|
||||||
matches[interval_first].get_first_word_pos();
|
if index != 0 {
|
||||||
|
let interval_last = index - 1;
|
||||||
|
let interval_score =
|
||||||
|
self.match_interval_score(&matches[interval_first..=interval_last]);
|
||||||
|
|
||||||
// if the next match would mean that we pass the crop size window,
|
// keep interval if it's the best
|
||||||
// we take the last valid match, that didn't pass this boundry, which is `index` - 1,
|
save_best_interval(
|
||||||
// and calculate a score for it, and check if it's better than our best so far
|
&mut best_interval,
|
||||||
if next_match_last_word_pos - interval_first_match_first_word_pos >= crop_size {
|
interval_first,
|
||||||
// skip for 1, because it would result in the same as our very first interval score
|
interval_last,
|
||||||
if index != 1 {
|
interval_score,
|
||||||
let interval_last = index - 1;
|
);
|
||||||
let interval_score =
|
|
||||||
self.match_interval_score(&matches[interval_first..=interval_last]);
|
|
||||||
|
|
||||||
// keep interval if it's the best
|
|
||||||
if interval_score > best_interval_score {
|
|
||||||
best_interval = (interval_first, interval_last);
|
|
||||||
best_interval_score = interval_score;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// advance start of the interval while interval is longer than crop_size.
|
|
||||||
loop {
|
|
||||||
interval_first += 1;
|
|
||||||
let interval_first_match_first_word_pos =
|
|
||||||
matches[interval_first].get_first_word_pos();
|
|
||||||
|
|
||||||
if next_match_last_word_pos - interval_first_match_first_word_pos
|
|
||||||
< crop_size
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
index += 1;
|
// advance start of the interval while interval is longer than crop_size.
|
||||||
}
|
loop {
|
||||||
|
interval_first += 1;
|
||||||
|
interval_first_match_first_word_pos =
|
||||||
|
matches[interval_first].get_first_word_pos();
|
||||||
|
|
||||||
// compute the last interval score and compare it to the best one.
|
if interval_first_match_first_word_pos > next_match_last_word_pos
|
||||||
let interval_last = matches_len - 1;
|
|| next_match_last_word_pos - interval_first_match_first_word_pos
|
||||||
|
< crop_size
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// compute the last interval score and compare it to the best one.
|
||||||
|
let interval_last = matches_len - 1;
|
||||||
|
// if it's the last match with itself, we need to make sure it's
|
||||||
|
// not a phrase longer than the crop window
|
||||||
|
if interval_first != interval_last || matches[interval_first].get_word_count() < crop_size {
|
||||||
let interval_score =
|
let interval_score =
|
||||||
self.match_interval_score(&matches[interval_first..=interval_last]);
|
self.match_interval_score(&matches[interval_first..=interval_last]);
|
||||||
if interval_score > best_interval_score {
|
save_best_interval(&mut best_interval, interval_first, interval_last, interval_score);
|
||||||
best_interval = (interval_first, interval_last);
|
|
||||||
}
|
|
||||||
|
|
||||||
&matches[best_interval.0..=best_interval.1]
|
|
||||||
} else {
|
|
||||||
matches
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if none of the matches fit the criteria above, default to the first one
|
||||||
|
let best_interval = best_interval.map_or((0, 0), |v| v.interval);
|
||||||
|
&matches[best_interval.0..=best_interval.1]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the formatted version of the original text.
|
// Returns the formatted version of the original text.
|
||||||
@ -928,98 +1011,98 @@ mod tests {
|
|||||||
|
|
||||||
let format_options = FormatOptions { highlight: true, crop: Some(10) };
|
let format_options = FormatOptions { highlight: true, crop: Some(10) };
|
||||||
|
|
||||||
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
|
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
|
||||||
// let mut matcher = builder.build(text, None);
|
let mut matcher = builder.build(text, None);
|
||||||
// // should return 10 words with a marker at the start as well the end, and the highlighted matches.
|
// should return 10 words with a marker at the start as well the end, and the highlighted matches.
|
||||||
// insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
// matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
// @"…the power to split <em>the world</em> between those who embraced…"
|
@"…the power to split <em>the world</em> between those who embraced…"
|
||||||
// );
|
);
|
||||||
|
|
||||||
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"power to\" \"and those\"");
|
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"power to\" \"and those\"");
|
||||||
// let mut matcher = builder.build(text, None);
|
let mut matcher = builder.build(text, None);
|
||||||
// // should highlight "those" and the phrase "and those".
|
// should highlight "those" and the phrase "and those".
|
||||||
// insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
// matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
// @"…groundbreaking invention had the <em>power to</em> split the world between…"
|
@"…groundbreaking invention had the <em>power to</em> split the world between…"
|
||||||
// );
|
);
|
||||||
|
|
||||||
// let builder = MatcherBuilder::new_test(
|
|
||||||
// &rtxn,
|
|
||||||
// &temp_index,
|
|
||||||
// "\"The groundbreaking invention had the power to split the world\"",
|
|
||||||
// );
|
|
||||||
// let mut matcher = builder.build(text, None);
|
|
||||||
// insta::assert_snapshot!(
|
|
||||||
// matcher.format(format_options),
|
|
||||||
// @"<em>The groundbreaking invention had the power to split the world</em>…"
|
|
||||||
// );
|
|
||||||
|
|
||||||
let builder = MatcherBuilder::new_test(
|
let builder = MatcherBuilder::new_test(
|
||||||
&rtxn,
|
&rtxn,
|
||||||
&temp_index,
|
&temp_index,
|
||||||
"\"The groundbreaking invention had the power to split the world between\"",
|
"\"The groundbreaking invention had the power to split the world\"",
|
||||||
);
|
);
|
||||||
let mut matcher = builder.build(text, None);
|
let mut matcher = builder.build(text, None);
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@"The groundbreaking invention had the power to split the world …"
|
@"<em>The groundbreaking invention had the power to split the world</em>…"
|
||||||
);
|
);
|
||||||
|
|
||||||
// let builder = MatcherBuilder::new_test(
|
let builder = MatcherBuilder::new_test(
|
||||||
// &rtxn,
|
&rtxn,
|
||||||
// &temp_index,
|
&temp_index,
|
||||||
// "\"The groundbreaking invention\" \"embraced progress and those who resisted change!\"",
|
"\"The groundbreaking invention had the power to split the world between those\"",
|
||||||
// );
|
);
|
||||||
// let mut matcher = builder.build(text, None);
|
let mut matcher = builder.build(text, None);
|
||||||
// insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
// matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
// @"…between those who <em>embraced progress and those who resisted change</em>…"
|
@"The groundbreaking invention had the power to split the world…"
|
||||||
// );
|
);
|
||||||
|
|
||||||
// let builder = MatcherBuilder::new_test(
|
let builder = MatcherBuilder::new_test(
|
||||||
// &rtxn,
|
&rtxn,
|
||||||
// &temp_index,
|
&temp_index,
|
||||||
// "\"The groundbreaking invention\" \"split the world between those\"",
|
"\"The groundbreaking invention\" \"embraced progress and those who resisted change!\"",
|
||||||
// );
|
);
|
||||||
// let mut matcher = builder.build(text, None);
|
let mut matcher = builder.build(text, None);
|
||||||
// insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
// matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
// @"…the power to <em>split the world between those</em> who embraced…"
|
@"…between those who <em>embraced progress and those who resisted change</em>…"
|
||||||
// );
|
);
|
||||||
|
|
||||||
// let builder = MatcherBuilder::new_test(
|
let builder = MatcherBuilder::new_test(
|
||||||
// &rtxn,
|
&rtxn,
|
||||||
// &temp_index,
|
&temp_index,
|
||||||
// "\"groundbreaking invention\" \"split the world between\"",
|
"\"The groundbreaking invention\" \"split the world between those\"",
|
||||||
// );
|
);
|
||||||
// let mut matcher = builder.build(text, None);
|
let mut matcher = builder.build(text, None);
|
||||||
// insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
// matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
// @"…<em>groundbreaking invention</em> had the power to <em>split the world between</em>…"
|
@"…the power to <em>split the world between those</em> who embraced…"
|
||||||
// );
|
);
|
||||||
|
|
||||||
// let builder = MatcherBuilder::new_test(
|
let builder = MatcherBuilder::new_test(
|
||||||
// &rtxn,
|
&rtxn,
|
||||||
// &temp_index,
|
&temp_index,
|
||||||
// "\"groundbreaking invention\" \"had the power to split the world between those\"",
|
"\"groundbreaking invention\" \"split the world between\"",
|
||||||
// );
|
);
|
||||||
// let mut matcher = builder.build(text, None);
|
let mut matcher = builder.build(text, None);
|
||||||
// insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
// matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
// @"…invention <em>had the power to split the world between those</em>…"
|
@"…<em>groundbreaking invention</em> had the power to <em>split the world between</em>…"
|
||||||
// );
|
);
|
||||||
|
|
||||||
// let builder = MatcherBuilder::new_test(
|
let builder = MatcherBuilder::new_test(
|
||||||
// &rtxn,
|
&rtxn,
|
||||||
// &temp_index,
|
&temp_index,
|
||||||
// "\"The groundbreaking invention\" \"had the power to split the world between those\"",
|
"\"groundbreaking invention\" \"had the power to split the world between those\"",
|
||||||
// );
|
);
|
||||||
// let mut matcher = builder.build(text, None);
|
let mut matcher = builder.build(text, None);
|
||||||
// insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
// matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
// @"…invention <em>had the power to split the world between those</em>…"
|
@"…invention <em>had the power to split the world between those</em>…"
|
||||||
// );
|
);
|
||||||
|
|
||||||
|
let builder = MatcherBuilder::new_test(
|
||||||
|
&rtxn,
|
||||||
|
&temp_index,
|
||||||
|
"\"The groundbreaking invention\" \"had the power to split the world between those\"",
|
||||||
|
);
|
||||||
|
let mut matcher = builder.build(text, None);
|
||||||
|
insta::assert_snapshot!(
|
||||||
|
matcher.format(format_options),
|
||||||
|
@"…invention <em>had the power to split the world between those</em>…"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
Loading…
Reference in New Issue
Block a user