Make the matcher consume the search context

2025-03-03 12:24:40 +08:00 · 2023-04-06 12:28:28 +02:00 · 2023-04-06 12:28:28 +02:00 · ebe23b04c9
commit ebe23b04c9
parent 13b7c826c1
2 changed files with 35 additions and 34 deletions
--- a/milli/src/search/new/matches/matching_words.rs
+++ b/milli/src/search/new/matches/matching_words.rs
@ -23,9 +23,9 @@ pub struct LocatedMatchingWords {

 /// Structure created from a query tree
 /// referencing words that match the given query tree.
-pub struct MatchingWords<'ctx> {
-    word_interner: &'ctx DedupInterner<String>,
-    phrase_interner: &'ctx DedupInterner<Phrase>,
+pub struct MatchingWords {
+    word_interner: DedupInterner<String>,
+    phrase_interner: DedupInterner<Phrase>,
    phrases: Vec<LocatedMatchingPhrase>,
    words: Vec<LocatedMatchingWords>,
 }
@ -82,8 +82,8 @@ fn extract_matching_terms(term: &QueryTerm) -> (Vec<Interned<Phrase>>, Vec<Inter
    (matching_phrases, matching_words)
 }

-impl<'ctx> MatchingWords<'ctx> {
-    pub fn new(ctx: &'ctx SearchContext, located_terms: Vec<LocatedQueryTerm>) -> Self {
+impl MatchingWords {
+    pub fn new(ctx: SearchContext, located_terms: Vec<LocatedQueryTerm>) -> Self {
        let mut phrases = Vec::new();
        let mut words = Vec::new();

@ -112,18 +112,18 @@ impl<'ctx> MatchingWords<'ctx> {
        Self {
            phrases,
            words,
-            word_interner: &ctx.word_interner,
-            phrase_interner: &ctx.phrase_interner,
+            word_interner: ctx.word_interner,
+            phrase_interner: ctx.phrase_interner,
        }
    }

    /// Returns an iterator over terms that match or partially match the given token.
-    pub fn match_token<'b>(&'ctx self, token: &'b Token<'b>) -> MatchesIter<'ctx, 'b> {
+    pub fn match_token<'a, 'b>(&'a self, token: &'b Token<'b>) -> MatchesIter<'a, 'b> {
        MatchesIter { matching_words: self, phrases: Box::new(self.phrases.iter()), token }
    }

    /// Try to match the token with one of the located_words.
-    fn match_unique_words(&'ctx self, token: &Token) -> Option<MatchType<'ctx>> {
+    fn match_unique_words<'a>(&'a self, token: &Token) -> Option<MatchType<'a>> {
        for located_words in &self.words {
            for word in &located_words.value {
                let word = self.word_interner.get(*word);
@ -148,7 +148,7 @@ impl<'ctx> MatchingWords<'ctx> {
 /// Iterator over terms that match the given token,
 /// This allow to lazily evaluate matches.
 pub struct MatchesIter<'a, 'b> {
-    matching_words: &'a MatchingWords<'a>,
+    matching_words: &'a MatchingWords,
    phrases: Box<dyn Iterator<Item = &'a LocatedMatchingPhrase> + 'a>,
    token: &'b Token<'b>,
 }
@ -268,7 +268,7 @@ pub(crate) mod tests {
        let tokenizer = TokenizerBuilder::new().build();
        let tokens = tokenizer.tokenize("split this world");
        let query_terms = located_query_terms_from_string(&mut ctx, tokens, None).unwrap();
-        let matching_words = MatchingWords::new(&ctx, query_terms);
+        let matching_words = MatchingWords::new(ctx, query_terms);

        assert_eq!(
            matching_words
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@ -14,17 +14,17 @@ const DEFAULT_HIGHLIGHT_PREFIX: &str = "<em>";
 const DEFAULT_HIGHLIGHT_SUFFIX: &str = "</em>";

 /// Structure used to build a Matcher allowing to customize formating tags.
-pub struct MatcherBuilder<'a, 'ctx, A> {
-    matching_words: MatchingWords<'ctx>,
+pub struct MatcherBuilder<'a, A> {
+    matching_words: MatchingWords,
    tokenizer: Tokenizer<'a, 'a, A>,
    crop_marker: Option<String>,
    highlight_prefix: Option<String>,
    highlight_suffix: Option<String>,
 }

-impl<'a, 'ctx, A> MatcherBuilder<'a, 'ctx, A> {
+impl<'a, A> MatcherBuilder<'a, A> {
    pub fn new(
-        ctx: &'ctx SearchContext,
+        ctx: SearchContext,
        located_terms: Vec<LocatedQueryTerm>,
        tokenizer: Tokenizer<'a, 'a, A>,
    ) -> Self {
@ -112,7 +112,7 @@ pub struct MatchBounds {
 /// and format the source string, returning a highlighted and cropped sub-string.
 pub struct Matcher<'t, 'm, A> {
    text: &'t str,
-    matching_words: &'m MatchingWords<'m>,
+    matching_words: &'m MatchingWords,
    tokenizer: &'m Tokenizer<'m, 'm, A>,
    crop_marker: &'m str,
    highlight_prefix: &'m str,
@ -509,11 +509,11 @@ mod tests {
    use super::super::located_query_terms_from_string;
    use super::*;

-    impl<'a, 'ctx> MatcherBuilder<'a, 'ctx, &[u8]> {
-        pub fn new_test(ctx: &'ctx mut SearchContext, query: &'a str) -> Self {
+    impl<'a, 'ctx> MatcherBuilder<'a, &[u8]> {
+        pub fn new_test(mut ctx: SearchContext, query: &'a str) -> Self {
            let tokenizer = TokenizerBuilder::new().build();
            let tokens = tokenizer.tokenize(query);
-            let query_terms = located_query_terms_from_string(ctx, tokens, None).unwrap();
+            let query_terms = located_query_terms_from_string(&mut ctx, tokens, None).unwrap();
            Self::new(ctx, query_terms, TokenizerBuilder::new().build())
        }
    }
@ -522,8 +522,8 @@ mod tests {
    fn format_identity() {
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let mut ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(&mut ctx, "split the world");
+        let ctx = SearchContext::new(&temp_index, &rtxn);
+        let builder = MatcherBuilder::new_test(ctx, "split the world");

        let format_options = FormatOptions { highlight: false, crop: None };

@ -550,8 +550,8 @@ mod tests {
    fn format_highlight() {
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let mut ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(&mut ctx, "split the world");
+        let ctx = SearchContext::new(&temp_index, &rtxn);
+        let builder = MatcherBuilder::new_test(ctx, "split the world");

        let format_options = FormatOptions { highlight: true, crop: None };

@ -594,8 +594,8 @@ mod tests {
    fn highlight_unicode() {
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let mut ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(&mut ctx, "world");
+        let ctx = SearchContext::new(&temp_index, &rtxn);
+        let builder = MatcherBuilder::new_test(ctx, "world");
        let format_options = FormatOptions { highlight: true, crop: None };

        // Text containing prefix match.
@ -616,7 +616,8 @@ mod tests {
            @"<em>Ŵôřlḑ</em>"
        );

-        let builder = MatcherBuilder::new_test(&mut ctx, "westfali");
+        let ctx = SearchContext::new(&temp_index, &rtxn);
+        let builder = MatcherBuilder::new_test(ctx, "westfali");
        let format_options = FormatOptions { highlight: true, crop: None };

        // Text containing unicode match.
@ -633,8 +634,8 @@ mod tests {
    fn format_crop() {
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let mut ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(&mut ctx, "split the world");
+        let ctx = SearchContext::new(&temp_index, &rtxn);
+        let builder = MatcherBuilder::new_test(ctx, "split the world");

        let format_options = FormatOptions { highlight: false, crop: Some(10) };

@ -731,8 +732,8 @@ mod tests {
    fn format_highlight_crop() {
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let mut ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(&mut ctx, "split the world");
+        let ctx = SearchContext::new(&temp_index, &rtxn);
+        let builder = MatcherBuilder::new_test(ctx, "split the world");

        let format_options = FormatOptions { highlight: true, crop: Some(10) };

@ -794,8 +795,8 @@ mod tests {
        //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let mut ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(&mut ctx, "split the world");
+        let ctx = SearchContext::new(&temp_index, &rtxn);
+        let builder = MatcherBuilder::new_test(ctx, "split the world");

        let text = "void void split the world void void.";

@ -831,8 +832,8 @@ mod tests {
    fn partial_matches() {
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let mut ctx = SearchContext::new(&temp_index, &rtxn);
-        let mut builder = MatcherBuilder::new_test(&mut ctx, "the \"t he\" door \"do or\"");
+        let ctx = SearchContext::new(&temp_index, &rtxn);
+        let mut builder = MatcherBuilder::new_test(ctx, "the \"t he\" door \"do or\"");
        builder.highlight_prefix("_".to_string());
        builder.highlight_suffix("_".to_string());