mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Make the matcher consume the search context
This commit is contained in:
parent
13b7c826c1
commit
ebe23b04c9
@ -23,9 +23,9 @@ pub struct LocatedMatchingWords {
|
|||||||
|
|
||||||
/// Structure created from a query tree
|
/// Structure created from a query tree
|
||||||
/// referencing words that match the given query tree.
|
/// referencing words that match the given query tree.
|
||||||
pub struct MatchingWords<'ctx> {
|
pub struct MatchingWords {
|
||||||
word_interner: &'ctx DedupInterner<String>,
|
word_interner: DedupInterner<String>,
|
||||||
phrase_interner: &'ctx DedupInterner<Phrase>,
|
phrase_interner: DedupInterner<Phrase>,
|
||||||
phrases: Vec<LocatedMatchingPhrase>,
|
phrases: Vec<LocatedMatchingPhrase>,
|
||||||
words: Vec<LocatedMatchingWords>,
|
words: Vec<LocatedMatchingWords>,
|
||||||
}
|
}
|
||||||
@ -82,8 +82,8 @@ fn extract_matching_terms(term: &QueryTerm) -> (Vec<Interned<Phrase>>, Vec<Inter
|
|||||||
(matching_phrases, matching_words)
|
(matching_phrases, matching_words)
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'ctx> MatchingWords<'ctx> {
|
impl MatchingWords {
|
||||||
pub fn new(ctx: &'ctx SearchContext, located_terms: Vec<LocatedQueryTerm>) -> Self {
|
pub fn new(ctx: SearchContext, located_terms: Vec<LocatedQueryTerm>) -> Self {
|
||||||
let mut phrases = Vec::new();
|
let mut phrases = Vec::new();
|
||||||
let mut words = Vec::new();
|
let mut words = Vec::new();
|
||||||
|
|
||||||
@ -112,18 +112,18 @@ impl<'ctx> MatchingWords<'ctx> {
|
|||||||
Self {
|
Self {
|
||||||
phrases,
|
phrases,
|
||||||
words,
|
words,
|
||||||
word_interner: &ctx.word_interner,
|
word_interner: ctx.word_interner,
|
||||||
phrase_interner: &ctx.phrase_interner,
|
phrase_interner: ctx.phrase_interner,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns an iterator over terms that match or partially match the given token.
|
/// Returns an iterator over terms that match or partially match the given token.
|
||||||
pub fn match_token<'b>(&'ctx self, token: &'b Token<'b>) -> MatchesIter<'ctx, 'b> {
|
pub fn match_token<'a, 'b>(&'a self, token: &'b Token<'b>) -> MatchesIter<'a, 'b> {
|
||||||
MatchesIter { matching_words: self, phrases: Box::new(self.phrases.iter()), token }
|
MatchesIter { matching_words: self, phrases: Box::new(self.phrases.iter()), token }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Try to match the token with one of the located_words.
|
/// Try to match the token with one of the located_words.
|
||||||
fn match_unique_words(&'ctx self, token: &Token) -> Option<MatchType<'ctx>> {
|
fn match_unique_words<'a>(&'a self, token: &Token) -> Option<MatchType<'a>> {
|
||||||
for located_words in &self.words {
|
for located_words in &self.words {
|
||||||
for word in &located_words.value {
|
for word in &located_words.value {
|
||||||
let word = self.word_interner.get(*word);
|
let word = self.word_interner.get(*word);
|
||||||
@ -148,7 +148,7 @@ impl<'ctx> MatchingWords<'ctx> {
|
|||||||
/// Iterator over terms that match the given token,
|
/// Iterator over terms that match the given token,
|
||||||
/// This allow to lazily evaluate matches.
|
/// This allow to lazily evaluate matches.
|
||||||
pub struct MatchesIter<'a, 'b> {
|
pub struct MatchesIter<'a, 'b> {
|
||||||
matching_words: &'a MatchingWords<'a>,
|
matching_words: &'a MatchingWords,
|
||||||
phrases: Box<dyn Iterator<Item = &'a LocatedMatchingPhrase> + 'a>,
|
phrases: Box<dyn Iterator<Item = &'a LocatedMatchingPhrase> + 'a>,
|
||||||
token: &'b Token<'b>,
|
token: &'b Token<'b>,
|
||||||
}
|
}
|
||||||
@ -268,7 +268,7 @@ pub(crate) mod tests {
|
|||||||
let tokenizer = TokenizerBuilder::new().build();
|
let tokenizer = TokenizerBuilder::new().build();
|
||||||
let tokens = tokenizer.tokenize("split this world");
|
let tokens = tokenizer.tokenize("split this world");
|
||||||
let query_terms = located_query_terms_from_string(&mut ctx, tokens, None).unwrap();
|
let query_terms = located_query_terms_from_string(&mut ctx, tokens, None).unwrap();
|
||||||
let matching_words = MatchingWords::new(&ctx, query_terms);
|
let matching_words = MatchingWords::new(ctx, query_terms);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
matching_words
|
matching_words
|
||||||
|
@ -14,17 +14,17 @@ const DEFAULT_HIGHLIGHT_PREFIX: &str = "<em>";
|
|||||||
const DEFAULT_HIGHLIGHT_SUFFIX: &str = "</em>";
|
const DEFAULT_HIGHLIGHT_SUFFIX: &str = "</em>";
|
||||||
|
|
||||||
/// Structure used to build a Matcher allowing to customize formating tags.
|
/// Structure used to build a Matcher allowing to customize formating tags.
|
||||||
pub struct MatcherBuilder<'a, 'ctx, A> {
|
pub struct MatcherBuilder<'a, A> {
|
||||||
matching_words: MatchingWords<'ctx>,
|
matching_words: MatchingWords,
|
||||||
tokenizer: Tokenizer<'a, 'a, A>,
|
tokenizer: Tokenizer<'a, 'a, A>,
|
||||||
crop_marker: Option<String>,
|
crop_marker: Option<String>,
|
||||||
highlight_prefix: Option<String>,
|
highlight_prefix: Option<String>,
|
||||||
highlight_suffix: Option<String>,
|
highlight_suffix: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 'ctx, A> MatcherBuilder<'a, 'ctx, A> {
|
impl<'a, A> MatcherBuilder<'a, A> {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
ctx: &'ctx SearchContext,
|
ctx: SearchContext,
|
||||||
located_terms: Vec<LocatedQueryTerm>,
|
located_terms: Vec<LocatedQueryTerm>,
|
||||||
tokenizer: Tokenizer<'a, 'a, A>,
|
tokenizer: Tokenizer<'a, 'a, A>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
@ -112,7 +112,7 @@ pub struct MatchBounds {
|
|||||||
/// and format the source string, returning a highlighted and cropped sub-string.
|
/// and format the source string, returning a highlighted and cropped sub-string.
|
||||||
pub struct Matcher<'t, 'm, A> {
|
pub struct Matcher<'t, 'm, A> {
|
||||||
text: &'t str,
|
text: &'t str,
|
||||||
matching_words: &'m MatchingWords<'m>,
|
matching_words: &'m MatchingWords,
|
||||||
tokenizer: &'m Tokenizer<'m, 'm, A>,
|
tokenizer: &'m Tokenizer<'m, 'm, A>,
|
||||||
crop_marker: &'m str,
|
crop_marker: &'m str,
|
||||||
highlight_prefix: &'m str,
|
highlight_prefix: &'m str,
|
||||||
@ -509,11 +509,11 @@ mod tests {
|
|||||||
use super::super::located_query_terms_from_string;
|
use super::super::located_query_terms_from_string;
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
impl<'a, 'ctx> MatcherBuilder<'a, 'ctx, &[u8]> {
|
impl<'a, 'ctx> MatcherBuilder<'a, &[u8]> {
|
||||||
pub fn new_test(ctx: &'ctx mut SearchContext, query: &'a str) -> Self {
|
pub fn new_test(mut ctx: SearchContext, query: &'a str) -> Self {
|
||||||
let tokenizer = TokenizerBuilder::new().build();
|
let tokenizer = TokenizerBuilder::new().build();
|
||||||
let tokens = tokenizer.tokenize(query);
|
let tokens = tokenizer.tokenize(query);
|
||||||
let query_terms = located_query_terms_from_string(ctx, tokens, None).unwrap();
|
let query_terms = located_query_terms_from_string(&mut ctx, tokens, None).unwrap();
|
||||||
Self::new(ctx, query_terms, TokenizerBuilder::new().build())
|
Self::new(ctx, query_terms, TokenizerBuilder::new().build())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -522,8 +522,8 @@ mod tests {
|
|||||||
fn format_identity() {
|
fn format_identity() {
|
||||||
let temp_index = temp_index_with_documents();
|
let temp_index = temp_index_with_documents();
|
||||||
let rtxn = temp_index.read_txn().unwrap();
|
let rtxn = temp_index.read_txn().unwrap();
|
||||||
let mut ctx = SearchContext::new(&temp_index, &rtxn);
|
let ctx = SearchContext::new(&temp_index, &rtxn);
|
||||||
let builder = MatcherBuilder::new_test(&mut ctx, "split the world");
|
let builder = MatcherBuilder::new_test(ctx, "split the world");
|
||||||
|
|
||||||
let format_options = FormatOptions { highlight: false, crop: None };
|
let format_options = FormatOptions { highlight: false, crop: None };
|
||||||
|
|
||||||
@ -550,8 +550,8 @@ mod tests {
|
|||||||
fn format_highlight() {
|
fn format_highlight() {
|
||||||
let temp_index = temp_index_with_documents();
|
let temp_index = temp_index_with_documents();
|
||||||
let rtxn = temp_index.read_txn().unwrap();
|
let rtxn = temp_index.read_txn().unwrap();
|
||||||
let mut ctx = SearchContext::new(&temp_index, &rtxn);
|
let ctx = SearchContext::new(&temp_index, &rtxn);
|
||||||
let builder = MatcherBuilder::new_test(&mut ctx, "split the world");
|
let builder = MatcherBuilder::new_test(ctx, "split the world");
|
||||||
|
|
||||||
let format_options = FormatOptions { highlight: true, crop: None };
|
let format_options = FormatOptions { highlight: true, crop: None };
|
||||||
|
|
||||||
@ -594,8 +594,8 @@ mod tests {
|
|||||||
fn highlight_unicode() {
|
fn highlight_unicode() {
|
||||||
let temp_index = temp_index_with_documents();
|
let temp_index = temp_index_with_documents();
|
||||||
let rtxn = temp_index.read_txn().unwrap();
|
let rtxn = temp_index.read_txn().unwrap();
|
||||||
let mut ctx = SearchContext::new(&temp_index, &rtxn);
|
let ctx = SearchContext::new(&temp_index, &rtxn);
|
||||||
let builder = MatcherBuilder::new_test(&mut ctx, "world");
|
let builder = MatcherBuilder::new_test(ctx, "world");
|
||||||
let format_options = FormatOptions { highlight: true, crop: None };
|
let format_options = FormatOptions { highlight: true, crop: None };
|
||||||
|
|
||||||
// Text containing prefix match.
|
// Text containing prefix match.
|
||||||
@ -616,7 +616,8 @@ mod tests {
|
|||||||
@"<em>Ŵôřlḑ</em>"
|
@"<em>Ŵôřlḑ</em>"
|
||||||
);
|
);
|
||||||
|
|
||||||
let builder = MatcherBuilder::new_test(&mut ctx, "westfali");
|
let ctx = SearchContext::new(&temp_index, &rtxn);
|
||||||
|
let builder = MatcherBuilder::new_test(ctx, "westfali");
|
||||||
let format_options = FormatOptions { highlight: true, crop: None };
|
let format_options = FormatOptions { highlight: true, crop: None };
|
||||||
|
|
||||||
// Text containing unicode match.
|
// Text containing unicode match.
|
||||||
@ -633,8 +634,8 @@ mod tests {
|
|||||||
fn format_crop() {
|
fn format_crop() {
|
||||||
let temp_index = temp_index_with_documents();
|
let temp_index = temp_index_with_documents();
|
||||||
let rtxn = temp_index.read_txn().unwrap();
|
let rtxn = temp_index.read_txn().unwrap();
|
||||||
let mut ctx = SearchContext::new(&temp_index, &rtxn);
|
let ctx = SearchContext::new(&temp_index, &rtxn);
|
||||||
let builder = MatcherBuilder::new_test(&mut ctx, "split the world");
|
let builder = MatcherBuilder::new_test(ctx, "split the world");
|
||||||
|
|
||||||
let format_options = FormatOptions { highlight: false, crop: Some(10) };
|
let format_options = FormatOptions { highlight: false, crop: Some(10) };
|
||||||
|
|
||||||
@ -731,8 +732,8 @@ mod tests {
|
|||||||
fn format_highlight_crop() {
|
fn format_highlight_crop() {
|
||||||
let temp_index = temp_index_with_documents();
|
let temp_index = temp_index_with_documents();
|
||||||
let rtxn = temp_index.read_txn().unwrap();
|
let rtxn = temp_index.read_txn().unwrap();
|
||||||
let mut ctx = SearchContext::new(&temp_index, &rtxn);
|
let ctx = SearchContext::new(&temp_index, &rtxn);
|
||||||
let builder = MatcherBuilder::new_test(&mut ctx, "split the world");
|
let builder = MatcherBuilder::new_test(ctx, "split the world");
|
||||||
|
|
||||||
let format_options = FormatOptions { highlight: true, crop: Some(10) };
|
let format_options = FormatOptions { highlight: true, crop: Some(10) };
|
||||||
|
|
||||||
@ -794,8 +795,8 @@ mod tests {
|
|||||||
//! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
|
//! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
|
||||||
let temp_index = temp_index_with_documents();
|
let temp_index = temp_index_with_documents();
|
||||||
let rtxn = temp_index.read_txn().unwrap();
|
let rtxn = temp_index.read_txn().unwrap();
|
||||||
let mut ctx = SearchContext::new(&temp_index, &rtxn);
|
let ctx = SearchContext::new(&temp_index, &rtxn);
|
||||||
let builder = MatcherBuilder::new_test(&mut ctx, "split the world");
|
let builder = MatcherBuilder::new_test(ctx, "split the world");
|
||||||
|
|
||||||
let text = "void void split the world void void.";
|
let text = "void void split the world void void.";
|
||||||
|
|
||||||
@ -831,8 +832,8 @@ mod tests {
|
|||||||
fn partial_matches() {
|
fn partial_matches() {
|
||||||
let temp_index = temp_index_with_documents();
|
let temp_index = temp_index_with_documents();
|
||||||
let rtxn = temp_index.read_txn().unwrap();
|
let rtxn = temp_index.read_txn().unwrap();
|
||||||
let mut ctx = SearchContext::new(&temp_index, &rtxn);
|
let ctx = SearchContext::new(&temp_index, &rtxn);
|
||||||
let mut builder = MatcherBuilder::new_test(&mut ctx, "the \"t he\" door \"do or\"");
|
let mut builder = MatcherBuilder::new_test(ctx, "the \"t he\" door \"do or\"");
|
||||||
builder.highlight_prefix("_".to_string());
|
builder.highlight_prefix("_".to_string());
|
||||||
builder.highlight_suffix("_".to_string());
|
builder.highlight_suffix("_".to_string());
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user