meilisearch/milli/src/search/new/query_term/mod.rs

mod compute_derivations;
mod ntypo_subset;
mod parse_query;
mod phrase;

use std::collections::BTreeSet;
use std::iter::FromIterator;
use std::ops::RangeInclusive;

use compute_derivations::partially_initialized_term_from_word;
use either::Either;
pub use ntypo_subset::NTypoTermSubset;
pub use parse_query::{located_query_terms_from_tokens, make_ngram, number_of_typos_allowed};
pub use phrase::Phrase;

use super::interner::{DedupInterner, Interned};
use super::{limits, SearchContext, Word};
use crate::Result;

/// A set of word derivations attached to a location in the search query.
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct LocatedQueryTermSubset {
    pub term_subset: QueryTermSubset,
    pub positions: RangeInclusive<u16>,
    pub term_ids: RangeInclusive<u8>,
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct QueryTermSubset {
    original: Interned<QueryTerm>,
    zero_typo_subset: NTypoTermSubset,
    one_typo_subset: NTypoTermSubset,
    two_typo_subset: NTypoTermSubset,
    /// `true` if the term cannot be deleted through the term matching strategy
    ///
    /// Note that there are other reasons for which a term cannot be deleted, such as
    /// being a phrase. In that case, this field could be set to `false`, but it
    /// still wouldn't be deleteable by the term matching strategy.
    mandatory: bool,
}

#[derive(Clone, PartialEq, Eq, Hash)]
pub struct QueryTerm {
    original: Interned<String>,
    ngram_words: Option<Vec<Interned<String>>>,
    max_levenshtein_distance: u8,
    is_prefix: bool,
    zero_typo: ZeroTypoTerm,
    // May not be computed yet
    one_typo: Lazy<OneTypoTerm>,
    // May not be computed yet
    two_typo: Lazy<TwoTypoTerm>,
}

// SubTerms will be in a dedup interner
#[derive(Default, Clone, PartialEq, Eq, Hash)]
struct ZeroTypoTerm {
    /// The original phrase, if any
    phrase: Option<Interned<Phrase>>,
    /// A single word equivalent to the original term, with zero typos
    exact: Option<Interned<String>>,
    /// All the words that contain the original word as prefix
    prefix_of: BTreeSet<Interned<String>>,
    /// All the synonyms of the original word or phrase
    synonyms: BTreeSet<Interned<Phrase>>,
    /// A prefix in the prefix databases matching the original word
    use_prefix_db: Option<Interned<String>>,
}
#[derive(Default, Clone, PartialEq, Eq, Hash)]
struct OneTypoTerm {
    /// The original word split into multiple consecutive words
    split_words: Option<Interned<Phrase>>,
    /// Words that are 1 typo away from the original word
    one_typo: BTreeSet<Interned<String>>,
}
#[derive(Default, Clone, PartialEq, Eq, Hash)]
struct TwoTypoTerm {
    /// Words that are 2 typos away from the original word
    two_typos: BTreeSet<Interned<String>>,
}

#[derive(Clone, PartialEq, Eq, Hash)]
pub enum Lazy<T> {
    Uninit,
    Init(T),
}
impl<T> Lazy<T> {
    pub fn is_init(&self) -> bool {
        match self {
            Lazy::Uninit => false,
            Lazy::Init(_) => true,
        }
    }
    pub fn is_uninit(&self) -> bool {
        match self {
            Lazy::Uninit => true,
            Lazy::Init(_) => false,
        }
    }
}

#[derive(Clone, Copy)]
pub enum ExactTerm {
    Phrase(Interned<Phrase>),
    Word(Interned<String>),
}

impl ExactTerm {
    pub fn interned_words<'ctx>(
        &self,
        ctx: &'ctx SearchContext<'ctx>,
    ) -> impl Iterator<Item = Option<Interned<String>>> + 'ctx {
        match *self {
            ExactTerm::Phrase(phrase) => {
                let phrase = ctx.phrase_interner.get(phrase);
                Either::Left(phrase.words.iter().copied())
            }
            ExactTerm::Word(word) => Either::Right(std::iter::once(Some(word))),
        }
    }
}

impl QueryTermSubset {
    pub fn is_mandatory(&self) -> bool {
        self.mandatory
    }
    pub fn make_mandatory(&mut self) {
        self.mandatory = true;
    }
    pub fn exact_term(&self, ctx: &SearchContext) -> Option<ExactTerm> {
        let full_query_term = ctx.term_interner.get(self.original);
        if full_query_term.ngram_words.is_some() {
            return None;
        }
        if let Some(phrase) = full_query_term.zero_typo.phrase {
            self.zero_typo_subset.contains_phrase(phrase).then_some(ExactTerm::Phrase(phrase))
        } else if let Some(word) = full_query_term.zero_typo.exact {
            self.zero_typo_subset.contains_word(word).then_some(ExactTerm::Word(word))
        } else {
            None
        }
    }

    pub fn empty(for_term: Interned<QueryTerm>) -> Self {
        Self {
            original: for_term,
            zero_typo_subset: NTypoTermSubset::Nothing,
            one_typo_subset: NTypoTermSubset::Nothing,
            two_typo_subset: NTypoTermSubset::Nothing,
            mandatory: false,
        }
    }
    pub fn full(for_term: Interned<QueryTerm>) -> Self {
        Self {
            original: for_term,
            zero_typo_subset: NTypoTermSubset::All,
            one_typo_subset: NTypoTermSubset::All,
            two_typo_subset: NTypoTermSubset::All,
            mandatory: false,
        }
    }

    pub fn union(&mut self, other: &Self) {
        assert!(self.original == other.original);
        self.zero_typo_subset.union(&other.zero_typo_subset);
        self.one_typo_subset.union(&other.one_typo_subset);
        self.two_typo_subset.union(&other.two_typo_subset);
    }
    pub fn intersect(&mut self, other: &Self) {
        assert!(self.original == other.original);
        self.zero_typo_subset.intersect(&other.zero_typo_subset);
        self.one_typo_subset.intersect(&other.one_typo_subset);
        self.two_typo_subset.intersect(&other.two_typo_subset);
    }

    pub fn use_prefix_db(&self, ctx: &SearchContext) -> Option<Word> {
        let original = ctx.term_interner.get(self.original);
        let Some(use_prefix_db) = original.zero_typo.use_prefix_db else { return None };
        let word = match &self.zero_typo_subset {
            NTypoTermSubset::All => Some(use_prefix_db),
            NTypoTermSubset::Subset { words, phrases: _ } => {
                if words.contains(&use_prefix_db) {
                    Some(use_prefix_db)
                } else {
                    None
                }
            }
            NTypoTermSubset::Nothing => None,
        };
        word.map(|word| {
            if original.ngram_words.is_some() {
                Word::Derived(word)
            } else {
                Word::Original(word)
            }
        })
    }
    pub fn all_single_words_except_prefix_db(
        &self,
        ctx: &mut SearchContext,
    ) -> Result<BTreeSet<Word>> {
        let mut result = BTreeSet::default();
        if !self.one_typo_subset.is_empty() || !self.two_typo_subset.is_empty() {
            self.original.compute_fully_if_needed(ctx)?;
        }

        let original = ctx.term_interner.get_mut(self.original);
        match &self.zero_typo_subset {
            NTypoTermSubset::All => {
                let ZeroTypoTerm {
                    phrase: _,
                    exact: zero_typo,
                    prefix_of,
                    synonyms: _,
                    use_prefix_db: _,
                } = &original.zero_typo;
                result.extend(zero_typo.iter().copied().map(|w| {
                    if original.ngram_words.is_some() {
                        Word::Derived(w)
                    } else {
                        Word::Original(w)
                    }
                }));
                result.extend(prefix_of.iter().copied().map(|w| {
                    if original.ngram_words.is_some() {
                        Word::Derived(w)
                    } else {
                        Word::Original(w)
                    }
                }));
            }
            NTypoTermSubset::Subset { words, phrases: _ } => {
                let ZeroTypoTerm {
                    phrase: _,
                    exact: zero_typo,
                    prefix_of,
                    synonyms: _,
                    use_prefix_db: _,
                } = &original.zero_typo;
                if let Some(zero_typo) = zero_typo {
                    if words.contains(zero_typo) {
                        if original.ngram_words.is_some() {
                            result.insert(Word::Derived(*zero_typo));
                        } else {
                            result.insert(Word::Original(*zero_typo));
                        }
                    }
                }
                result.extend(prefix_of.intersection(words).copied().map(|w| {
                    if original.ngram_words.is_some() {
                        Word::Derived(w)
                    } else {
                        Word::Original(w)
                    }
                }));
            }
            NTypoTermSubset::Nothing => {}
        }

        match &self.one_typo_subset {
            NTypoTermSubset::All => {
                let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo
                else {
                    panic!()
                };
                result.extend(one_typo.iter().copied().map(Word::Derived))
            }
            NTypoTermSubset::Subset { words, phrases: _ } => {
                let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo
                else {
                    panic!()
                };
                result.extend(one_typo.intersection(words).copied().map(Word::Derived));
            }
            NTypoTermSubset::Nothing => {}
        };

        match &self.two_typo_subset {
            NTypoTermSubset::All => {
                let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else { panic!() };
                result.extend(two_typos.iter().copied().map(Word::Derived));
            }
            NTypoTermSubset::Subset { words, phrases: _ } => {
                let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else { panic!() };
                result.extend(two_typos.intersection(words).copied().map(Word::Derived));
            }
            NTypoTermSubset::Nothing => {}
        };

        Ok(result)
    }
    pub fn all_phrases(&self, ctx: &mut SearchContext) -> Result<BTreeSet<Interned<Phrase>>> {
        let mut result = BTreeSet::default();

        if !self.one_typo_subset.is_empty() {
            self.original.compute_fully_if_needed(ctx)?;
        }
        let original = ctx.term_interner.get_mut(self.original);

        let ZeroTypoTerm { phrase, exact: _, prefix_of: _, synonyms, use_prefix_db: _ } =
            &original.zero_typo;
        result.extend(phrase.iter().copied());
        result.extend(synonyms.iter().copied());

        match &self.one_typo_subset {
            NTypoTermSubset::All => {
                let Lazy::Init(OneTypoTerm { split_words, one_typo: _ }) = &original.one_typo
                else {
                    panic!();
                };
                result.extend(split_words.iter().copied());
            }
            NTypoTermSubset::Subset { phrases, .. } => {
                let Lazy::Init(OneTypoTerm { split_words, one_typo: _ }) = &original.one_typo
                else {
                    panic!();
                };
                if let Some(split_words) = split_words {
                    if phrases.contains(split_words) {
                        result.insert(*split_words);
                    }
                }
            }
            NTypoTermSubset::Nothing => {}
        }

        Ok(result)
    }

    pub fn original_phrase(&self, ctx: &SearchContext) -> Option<Interned<Phrase>> {
        let t = ctx.term_interner.get(self.original);
        if let Some(p) = t.zero_typo.phrase {
            if self.zero_typo_subset.contains_phrase(p) {
                return Some(p);
            }
        }
        None
    }
    pub fn max_typo_cost(&self, ctx: &SearchContext) -> u8 {
        let t = ctx.term_interner.get(self.original);
        match t.max_levenshtein_distance {
            0 => {
                if t.allows_split_words() {
                    1
                } else {
                    0
                }
            }
            1 => {
                if self.one_typo_subset.is_empty() {
                    0
                } else {
                    1
                }
            }
            2 => {
                if self.two_typo_subset.is_empty() {
                    if self.one_typo_subset.is_empty() {
                        0
                    } else {
                        1
                    }
                } else {
                    2
                }
            }
            _ => panic!(),
        }
    }
    pub fn keep_only_exact_term(&mut self, ctx: &SearchContext) {
        if let Some(term) = self.exact_term(ctx) {
            match term {
                ExactTerm::Phrase(p) => {
                    self.zero_typo_subset = NTypoTermSubset::Subset {
                        words: BTreeSet::new(),
                        phrases: BTreeSet::from_iter([p]),
                    };
                    self.clear_one_typo_subset();
                    self.clear_two_typo_subset();
                }
                ExactTerm::Word(w) => {
                    self.zero_typo_subset = NTypoTermSubset::Subset {
                        words: BTreeSet::from_iter([w]),
                        phrases: BTreeSet::new(),
                    };
                    self.clear_one_typo_subset();
                    self.clear_two_typo_subset();
                }
            }
        }
    }
    pub fn clear_zero_typo_subset(&mut self) {
        self.zero_typo_subset = NTypoTermSubset::Nothing;
    }
    pub fn clear_one_typo_subset(&mut self) {
        self.one_typo_subset = NTypoTermSubset::Nothing;
    }
    pub fn clear_two_typo_subset(&mut self) {
        self.two_typo_subset = NTypoTermSubset::Nothing;
    }
    pub fn description(&self, ctx: &SearchContext) -> String {
        let t = ctx.term_interner.get(self.original);
        ctx.word_interner.get(t.original).to_owned()
    }
}

impl ZeroTypoTerm {
    fn is_empty(&self) -> bool {
        let ZeroTypoTerm { phrase, exact: zero_typo, prefix_of, synonyms, use_prefix_db } = self;
        phrase.is_none()
            && zero_typo.is_none()
            && prefix_of.is_empty()
            && synonyms.is_empty()
            && use_prefix_db.is_none()
    }
}
impl OneTypoTerm {
    fn is_empty(&self) -> bool {
        let OneTypoTerm { split_words, one_typo } = self;
        one_typo.is_empty() && split_words.is_none()
    }
}
impl TwoTypoTerm {
    fn is_empty(&self) -> bool {
        let TwoTypoTerm { two_typos } = self;
        two_typos.is_empty()
    }
}

impl QueryTerm {
    fn is_empty(&self) -> bool {
        let Lazy::Init(one_typo) = &self.one_typo else {
            return false;
        };
        let Lazy::Init(two_typo) = &self.two_typo else {
            return false;
        };

        self.zero_typo.is_empty() && one_typo.is_empty() && two_typo.is_empty()
    }
    fn allows_split_words(&self) -> bool {
        self.zero_typo.phrase.is_none()
    }
}

impl Interned<QueryTerm> {
    /// Return the original word from the given query term
    fn original_single_word(self, ctx: &SearchContext) -> Option<Interned<String>> {
        let self_ = ctx.term_interner.get(self);
        if self_.ngram_words.is_some() {
            None
        } else {
            Some(self_.original)
        }
    }
}

/// A query term coupled with its position in the user's search query.
#[derive(Clone)]
pub struct LocatedQueryTerm {
    pub value: Interned<QueryTerm>,
    pub positions: RangeInclusive<u16>,
}

impl LocatedQueryTerm {
    /// Return `true` iff the term is empty
    pub fn is_empty(&self, interner: &DedupInterner<QueryTerm>) -> bool {
        interner.get(self.value).is_empty()
    }
}

impl QueryTerm {
    pub fn is_cached_prefix(&self) -> bool {
        self.zero_typo.use_prefix_db.is_some()
    }
    pub fn is_prefix(&self) -> bool {
        self.is_prefix
    }
    pub fn original_word(&self, ctx: &SearchContext) -> String {
        ctx.word_interner.get(self.original).clone()
    }
    pub fn all_computed_derivations(&self) -> (Vec<Interned<String>>, Vec<Interned<Phrase>>) {
        let mut words = BTreeSet::new();
        let mut phrases = BTreeSet::new();

        let ZeroTypoTerm { phrase, exact: zero_typo, prefix_of, synonyms, use_prefix_db: _ } =
            &self.zero_typo;
        words.extend(zero_typo.iter().copied());
        words.extend(prefix_of.iter().copied());
        phrases.extend(phrase.iter().copied());
        phrases.extend(synonyms.iter().copied());

        if let Lazy::Init(OneTypoTerm { split_words, one_typo }) = &self.one_typo {
            words.extend(one_typo.iter().copied());
            phrases.extend(split_words.iter().copied());
        };

        if let Lazy::Init(TwoTypoTerm { two_typos }) = &self.two_typo {
            words.extend(two_typos.iter().copied());
        };

        (words.into_iter().collect(), phrases.into_iter().collect())
    }
}
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`mod compute_derivations;`
			`mod ntypo_subset;`
			`mod parse_query;`
			`mod phrase;`

			`use std::collections::BTreeSet;`
Fix bugs and add tests to exactness ranking rule 2023-04-25 22:49:08 +08:00			`use std::iter::FromIterator;`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`use std::ops::RangeInclusive;`

Everyone uses the SearchContext::word_docids instead of get_db_word_docids make get_db_word_docids private 2023-04-12 00:27:41 +08:00			`use compute_derivations::partially_initialized_term_from_word;`
Merge branch 'search-refactor-exactness' into search-refactor-tests-doc 2023-04-06 19:07:00 +08:00			`use either::Either;`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`pub use ntypo_subset::NTypoTermSubset;`
rename located_query_terms_from_string -> located_query_terms_from_tokens 2023-05-03 00:53:01 +08:00			`pub use parse_query::{located_query_terms_from_tokens, make_ngram, number_of_typos_allowed};`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`pub use phrase::Phrase;`

Everyone uses the SearchContext::word_docids instead of get_db_word_docids make get_db_word_docids private 2023-04-12 00:27:41 +08:00			`use super::interner::{DedupInterner, Interned};`
			`use super::{limits, SearchContext, Word};`
			`use crate::Result;`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00
Merge branch 'search-refactor-exactness' into search-refactor-tests-doc 2023-04-06 19:07:00 +08:00			`/// A set of word derivations attached to a location in the search query.`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`#[derive(Clone, PartialEq, Eq, Hash)]`
			`pub struct LocatedQueryTermSubset {`
			`pub term_subset: QueryTermSubset,`
			`pub positions: RangeInclusive<u16>,`
			`pub term_ids: RangeInclusive<u8>,`
			`}`

			`#[derive(Debug, Clone, PartialEq, Eq, Hash)]`
			`pub struct QueryTermSubset {`
			`original: Interned<QueryTerm>,`
			`zero_typo_subset: NTypoTermSubset,`
			`one_typo_subset: NTypoTermSubset,`
			`two_typo_subset: NTypoTermSubset,`
Fix bugs and add tests to exactness ranking rule 2023-04-25 22:49:08 +08:00			/// `true` if the term cannot be deleted through the term matching strategy
			`///`
			`/// Note that there are other reasons for which a term cannot be deleted, such as`
			/// being a phrase. In that case, this field could be set to `false`, but it
			`/// still wouldn't be deleteable by the term matching strategy.`
			`mandatory: bool,`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`}`

			`#[derive(Clone, PartialEq, Eq, Hash)]`
			`pub struct QueryTerm {`
			`original: Interned<String>,`
			`ngram_words: Option<Vec<Interned<String>>>,`
Don't compute split_words for phrases 2023-05-16 22:22:23 +08:00			`max_levenshtein_distance: u8,`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`is_prefix: bool,`
			`zero_typo: ZeroTypoTerm,`
			`// May not be computed yet`
			`one_typo: Lazy<OneTypoTerm>,`
			`// May not be computed yet`
			`two_typo: Lazy<TwoTypoTerm>,`
			`}`

			`// SubTerms will be in a dedup interner`
			`#[derive(Default, Clone, PartialEq, Eq, Hash)]`
			`struct ZeroTypoTerm {`
			`/// The original phrase, if any`
			`phrase: Option<Interned<Phrase>>,`
			`/// A single word equivalent to the original term, with zero typos`
Merge branch 'search-refactor-exactness' into search-refactor-tests-doc 2023-04-06 19:07:00 +08:00			`exact: Option<Interned<String>>,`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`/// All the words that contain the original word as prefix`
			`prefix_of: BTreeSet<Interned<String>>,`
			`/// All the synonyms of the original word or phrase`
			`synonyms: BTreeSet<Interned<Phrase>>,`
			`/// A prefix in the prefix databases matching the original word`
			`use_prefix_db: Option<Interned<String>>,`
			`}`
			`#[derive(Default, Clone, PartialEq, Eq, Hash)]`
			`struct OneTypoTerm {`
			`/// The original word split into multiple consecutive words`
			`split_words: Option<Interned<Phrase>>,`
			`/// Words that are 1 typo away from the original word`
			`one_typo: BTreeSet<Interned<String>>,`
			`}`
			`#[derive(Default, Clone, PartialEq, Eq, Hash)]`
			`struct TwoTypoTerm {`
			`/// Words that are 2 typos away from the original word`
			`two_typos: BTreeSet<Interned<String>>,`
			`}`

			`#[derive(Clone, PartialEq, Eq, Hash)]`
			`pub enum Lazy<T> {`
			`Uninit,`
			`Init(T),`
			`}`
			`impl<T> Lazy<T> {`
			`pub fn is_init(&self) -> bool {`
			`match self {`
			`Lazy::Uninit => false,`
			`Lazy::Init(_) => true,`
			`}`
			`}`
			`pub fn is_uninit(&self) -> bool {`
			`match self {`
			`Lazy::Uninit => true,`
			`Lazy::Init(_) => false,`
			`}`
			`}`
			`}`

Merge branch 'search-refactor-exactness' into search-refactor-tests-doc 2023-04-06 19:07:00 +08:00			`#[derive(Clone, Copy)]`
			`pub enum ExactTerm {`
			`Phrase(Interned<Phrase>),`
			`Word(Interned<String>),`
			`}`

			`impl ExactTerm {`
			`pub fn interned_words<'ctx>(`
			`&self,`
			`ctx: &'ctx SearchContext<'ctx>,`
			`) -> impl Iterator<Item = Option<Interned<String>>> + 'ctx {`
			`match *self {`
			`ExactTerm::Phrase(phrase) => {`
			`let phrase = ctx.phrase_interner.get(phrase);`
			`Either::Left(phrase.words.iter().copied())`
			`}`
			`ExactTerm::Word(word) => Either::Right(std::iter::once(Some(word))),`
			`}`
			`}`
			`}`

Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`impl QueryTermSubset {`
Fix bugs and add tests to exactness ranking rule 2023-04-25 22:49:08 +08:00			`pub fn is_mandatory(&self) -> bool {`
			`self.mandatory`
			`}`
			`pub fn make_mandatory(&mut self) {`
			`self.mandatory = true;`
			`}`
Merge branch 'search-refactor-exactness' into search-refactor-tests-doc 2023-04-06 19:07:00 +08:00			`pub fn exact_term(&self, ctx: &SearchContext) -> Option<ExactTerm> {`
			`let full_query_term = ctx.term_interner.get(self.original);`
			`if full_query_term.ngram_words.is_some() {`
			`return None;`
			`}`
			`if let Some(phrase) = full_query_term.zero_typo.phrase {`
			`self.zero_typo_subset.contains_phrase(phrase).then_some(ExactTerm::Phrase(phrase))`
			`} else if let Some(word) = full_query_term.zero_typo.exact {`
			`self.zero_typo_subset.contains_word(word).then_some(ExactTerm::Word(word))`
			`} else {`
			`None`
			`}`
			`}`

Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`pub fn empty(for_term: Interned<QueryTerm>) -> Self {`
			`Self {`
			`original: for_term,`
			`zero_typo_subset: NTypoTermSubset::Nothing,`
			`one_typo_subset: NTypoTermSubset::Nothing,`
			`two_typo_subset: NTypoTermSubset::Nothing,`
Fix bugs and add tests to exactness ranking rule 2023-04-25 22:49:08 +08:00			`mandatory: false,`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`}`
			`}`
			`pub fn full(for_term: Interned<QueryTerm>) -> Self {`
			`Self {`
			`original: for_term,`
			`zero_typo_subset: NTypoTermSubset::All,`
			`one_typo_subset: NTypoTermSubset::All,`
			`two_typo_subset: NTypoTermSubset::All,`
Fix bugs and add tests to exactness ranking rule 2023-04-25 22:49:08 +08:00			`mandatory: false,`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`}`
			`}`

			`pub fn union(&mut self, other: &Self) {`
			`assert!(self.original == other.original);`
			`self.zero_typo_subset.union(&other.zero_typo_subset);`
			`self.one_typo_subset.union(&other.one_typo_subset);`
			`self.two_typo_subset.union(&other.two_typo_subset);`
			`}`
			`pub fn intersect(&mut self, other: &Self) {`
			`assert!(self.original == other.original);`
			`self.zero_typo_subset.intersect(&other.zero_typo_subset);`
			`self.one_typo_subset.intersect(&other.one_typo_subset);`
			`self.two_typo_subset.intersect(&other.two_typo_subset);`
			`}`

Use word_prefix_docids, make get_word_prefix_docids private 2023-04-12 04:06:10 +08:00			`pub fn use_prefix_db(&self, ctx: &SearchContext) -> Option<Word> {`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`let original = ctx.term_interner.get(self.original);`
Format let-else ❤️ 🎉 2023-07-03 16:20:28 +08:00			`let Some(use_prefix_db) = original.zero_typo.use_prefix_db else { return None };`
Use word_prefix_docids, make get_word_prefix_docids private 2023-04-12 04:06:10 +08:00			`let word = match &self.zero_typo_subset {`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`NTypoTermSubset::All => Some(use_prefix_db),`
			`NTypoTermSubset::Subset { words, phrases: _ } => {`
			`if words.contains(&use_prefix_db) {`
			`Some(use_prefix_db)`
			`} else {`
			`None`
			`}`
			`}`
			`NTypoTermSubset::Nothing => None,`
Use word_prefix_docids, make get_word_prefix_docids private 2023-04-12 04:06:10 +08:00			`};`
			`word.map(\|word\| {`
			`if original.ngram_words.is_some() {`
			`Word::Derived(word)`
			`} else {`
			`Word::Original(word)`
			`}`
			`})`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`}`
			`pub fn all_single_words_except_prefix_db(`
			`&self,`
			`ctx: &mut SearchContext,`
Everyone uses the SearchContext::word_docids instead of get_db_word_docids make get_db_word_docids private 2023-04-12 00:27:41 +08:00			`) -> Result<BTreeSet<Word>> {`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`let mut result = BTreeSet::default();`
			`if !self.one_typo_subset.is_empty() \|\| !self.two_typo_subset.is_empty() {`
			`self.original.compute_fully_if_needed(ctx)?;`
			`}`

			`let original = ctx.term_interner.get_mut(self.original);`
Matching words fixes 2023-04-11 21:41:44 +08:00			`match &self.zero_typo_subset {`
			`NTypoTermSubset::All => {`
			`let ZeroTypoTerm {`
			`phrase: _,`
			`exact: zero_typo,`
			`prefix_of,`
			`synonyms: _,`
			`use_prefix_db: _,`
			`} = &original.zero_typo;`
Everyone uses the SearchContext::word_docids instead of get_db_word_docids make get_db_word_docids private 2023-04-12 00:27:41 +08:00			`result.extend(zero_typo.iter().copied().map(\|w\| {`
			`if original.ngram_words.is_some() {`
			`Word::Derived(w)`
			`} else {`
			`Word::Original(w)`
			`}`
			`}));`
			`result.extend(prefix_of.iter().copied().map(\|w\| {`
			`if original.ngram_words.is_some() {`
			`Word::Derived(w)`
			`} else {`
			`Word::Original(w)`
			`}`
			`}));`
Matching words fixes 2023-04-11 21:41:44 +08:00			`}`
			`NTypoTermSubset::Subset { words, phrases: _ } => {`
			`let ZeroTypoTerm {`
			`phrase: _,`
			`exact: zero_typo,`
			`prefix_of,`
			`synonyms: _,`
			`use_prefix_db: _,`
			`} = &original.zero_typo;`
			`if let Some(zero_typo) = zero_typo {`
			`if words.contains(zero_typo) {`
Everyone uses the SearchContext::word_docids instead of get_db_word_docids make get_db_word_docids private 2023-04-12 00:27:41 +08:00			`if original.ngram_words.is_some() {`
			`result.insert(Word::Derived(*zero_typo));`
			`} else {`
			`result.insert(Word::Original(*zero_typo));`
			`}`
Matching words fixes 2023-04-11 21:41:44 +08:00			`}`
			`}`
Merge branch 'search-refactor-typo-attributes' into search-refactor 2023-04-12 22:47:31 +08:00			`result.extend(prefix_of.intersection(words).copied().map(\|w\| {`
			`if original.ngram_words.is_some() {`
			`Word::Derived(w)`
			`} else {`
			`Word::Original(w)`
			`}`
			`}));`
Matching words fixes 2023-04-11 21:41:44 +08:00			`}`
			`NTypoTermSubset::Nothing => {}`
			`}`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00
			`match &self.one_typo_subset {`
			`NTypoTermSubset::All => {`
Format let-else ❤️ 🎉 2023-07-03 16:20:28 +08:00			`let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo`
			`else {`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`panic!()`
			`};`
Everyone uses the SearchContext::word_docids instead of get_db_word_docids make get_db_word_docids private 2023-04-12 00:27:41 +08:00			`result.extend(one_typo.iter().copied().map(Word::Derived))`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`}`
			`NTypoTermSubset::Subset { words, phrases: _ } => {`
Format let-else ❤️ 🎉 2023-07-03 16:20:28 +08:00			`let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo`
			`else {`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`panic!()`
			`};`
Everyone uses the SearchContext::word_docids instead of get_db_word_docids make get_db_word_docids private 2023-04-12 00:27:41 +08:00			`result.extend(one_typo.intersection(words).copied().map(Word::Derived));`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`}`
			`NTypoTermSubset::Nothing => {}`
			`};`

			`match &self.two_typo_subset {`
			`NTypoTermSubset::All => {`
Format let-else ❤️ 🎉 2023-07-03 16:20:28 +08:00			`let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else { panic!() };`
Everyone uses the SearchContext::word_docids instead of get_db_word_docids make get_db_word_docids private 2023-04-12 00:27:41 +08:00			`result.extend(two_typos.iter().copied().map(Word::Derived));`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`}`
			`NTypoTermSubset::Subset { words, phrases: _ } => {`
Format let-else ❤️ 🎉 2023-07-03 16:20:28 +08:00			`let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else { panic!() };`
Everyone uses the SearchContext::word_docids instead of get_db_word_docids make get_db_word_docids private 2023-04-12 00:27:41 +08:00			`result.extend(two_typos.intersection(words).copied().map(Word::Derived));`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`}`
			`NTypoTermSubset::Nothing => {}`
			`};`

			`Ok(result)`
			`}`
			`pub fn all_phrases(&self, ctx: &mut SearchContext) -> Result<BTreeSet<Interned<Phrase>>> {`
			`let mut result = BTreeSet::default();`

			`if !self.one_typo_subset.is_empty() {`
			`self.original.compute_fully_if_needed(ctx)?;`
			`}`
			`let original = ctx.term_interner.get_mut(self.original);`

Merge branch 'search-refactor-exactness' into search-refactor-tests-doc 2023-04-06 19:07:00 +08:00			`let ZeroTypoTerm { phrase, exact: _, prefix_of: _, synonyms, use_prefix_db: _ } =`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`&original.zero_typo;`
			`result.extend(phrase.iter().copied());`
			`result.extend(synonyms.iter().copied());`

Matching words fixes 2023-04-11 21:41:44 +08:00			`match &self.one_typo_subset {`
			`NTypoTermSubset::All => {`
Format let-else ❤️ 🎉 2023-07-03 16:20:28 +08:00			`let Lazy::Init(OneTypoTerm { split_words, one_typo: _ }) = &original.one_typo`
			`else {`
Matching words fixes 2023-04-11 21:41:44 +08:00			`panic!();`
			`};`
			`result.extend(split_words.iter().copied());`
			`}`
			`NTypoTermSubset::Subset { phrases, .. } => {`
Format let-else ❤️ 🎉 2023-07-03 16:20:28 +08:00			`let Lazy::Init(OneTypoTerm { split_words, one_typo: _ }) = &original.one_typo`
			`else {`
Matching words fixes 2023-04-11 21:41:44 +08:00			`panic!();`
			`};`
			`if let Some(split_words) = split_words {`
			`if phrases.contains(split_words) {`
			`result.insert(*split_words);`
			`}`
			`}`
			`}`
			`NTypoTermSubset::Nothing => {}`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`}`

			`Ok(result)`
			`}`

			`pub fn original_phrase(&self, ctx: &SearchContext) -> Option<Interned<Phrase>> {`
			`let t = ctx.term_interner.get(self.original);`
			`if let Some(p) = t.zero_typo.phrase {`
			`if self.zero_typo_subset.contains_phrase(p) {`
			`return Some(p);`
			`}`
			`}`
			`None`
			`}`
Don't compute split_words for phrases 2023-05-16 22:22:23 +08:00			`pub fn max_typo_cost(&self, ctx: &SearchContext) -> u8 {`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`let t = ctx.term_interner.get(self.original);`
Don't compute split_words for phrases 2023-05-16 22:22:23 +08:00			`match t.max_levenshtein_distance {`
			`0 => {`
			`if t.allows_split_words() {`
			`1`
			`} else {`
			`0`
			`}`
			`}`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`1 => {`
			`if self.one_typo_subset.is_empty() {`
			`0`
			`} else {`
			`1`
			`}`
			`}`
			`2 => {`
			`if self.two_typo_subset.is_empty() {`
			`if self.one_typo_subset.is_empty() {`
			`0`
			`} else {`
			`1`
			`}`
			`} else {`
			`2`
			`}`
			`}`
			`_ => panic!(),`
			`}`
			`}`
Fix bugs and add tests to exactness ranking rule 2023-04-25 22:49:08 +08:00			`pub fn keep_only_exact_term(&mut self, ctx: &SearchContext) {`
			`if let Some(term) = self.exact_term(ctx) {`
			`match term {`
			`ExactTerm::Phrase(p) => {`
			`self.zero_typo_subset = NTypoTermSubset::Subset {`
			`words: BTreeSet::new(),`
			`phrases: BTreeSet::from_iter([p]),`
			`};`
			`self.clear_one_typo_subset();`
			`self.clear_two_typo_subset();`
			`}`
			`ExactTerm::Word(w) => {`
			`self.zero_typo_subset = NTypoTermSubset::Subset {`
			`words: BTreeSet::from_iter([w]),`
			`phrases: BTreeSet::new(),`
			`};`
			`self.clear_one_typo_subset();`
			`self.clear_two_typo_subset();`
			`}`
			`}`
			`}`
			`}`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`pub fn clear_zero_typo_subset(&mut self) {`
			`self.zero_typo_subset = NTypoTermSubset::Nothing;`
			`}`
			`pub fn clear_one_typo_subset(&mut self) {`
			`self.one_typo_subset = NTypoTermSubset::Nothing;`
			`}`
			`pub fn clear_two_typo_subset(&mut self) {`
			`self.two_typo_subset = NTypoTermSubset::Nothing;`
			`}`
			`pub fn description(&self, ctx: &SearchContext) -> String {`
			`let t = ctx.term_interner.get(self.original);`
			`ctx.word_interner.get(t.original).to_owned()`
			`}`
			`}`

			`impl ZeroTypoTerm {`
			`fn is_empty(&self) -> bool {`
Merge branch 'search-refactor-exactness' into search-refactor-tests-doc 2023-04-06 19:07:00 +08:00			`let ZeroTypoTerm { phrase, exact: zero_typo, prefix_of, synonyms, use_prefix_db } = self;`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`phrase.is_none()`
			`&& zero_typo.is_none()`
			`&& prefix_of.is_empty()`
			`&& synonyms.is_empty()`
			`&& use_prefix_db.is_none()`
			`}`
			`}`
			`impl OneTypoTerm {`
			`fn is_empty(&self) -> bool {`
			`let OneTypoTerm { split_words, one_typo } = self;`
			`one_typo.is_empty() && split_words.is_none()`
			`}`
			`}`
			`impl TwoTypoTerm {`
			`fn is_empty(&self) -> bool {`
			`let TwoTypoTerm { two_typos } = self;`
			`two_typos.is_empty()`
			`}`
			`}`

			`impl QueryTerm {`
			`fn is_empty(&self) -> bool {`
			`let Lazy::Init(one_typo) = &self.one_typo else {`
			`return false;`
			`};`
			`let Lazy::Init(two_typo) = &self.two_typo else {`
			`return false;`
			`};`

			`self.zero_typo.is_empty() && one_typo.is_empty() && two_typo.is_empty()`
			`}`
Don't compute split_words for phrases 2023-05-16 22:22:23 +08:00			`fn allows_split_words(&self) -> bool {`
			`self.zero_typo.phrase.is_none()`
			`}`
Split query_term module into multiple submodules 2023-04-04 21:38:30 +08:00			`}`

			`impl Interned<QueryTerm> {`
			`/// Return the original word from the given query term`
			`fn original_single_word(self, ctx: &SearchContext) -> Option<Interned<String>> {`
			`let self_ = ctx.term_interner.get(self);`
			`if self_.ngram_words.is_some() {`
			`None`
			`} else {`
			`Some(self_.original)`
			`}`
			`}`
			`}`

			`/// A query term coupled with its position in the user's search query.`
			`#[derive(Clone)]`
			`pub struct LocatedQueryTerm {`
			`pub value: Interned<QueryTerm>,`
			`pub positions: RangeInclusive<u16>,`
			`}`

			`impl LocatedQueryTerm {`
			/// Return `true` iff the term is empty
			`pub fn is_empty(&self, interner: &DedupInterner<QueryTerm>) -> bool {`
			`interner.get(self.value).is_empty()`
			`}`
			`}`
Matching words fixes 2023-04-11 21:41:44 +08:00
			`impl QueryTerm {`
			`pub fn is_cached_prefix(&self) -> bool {`
			`self.zero_typo.use_prefix_db.is_some()`
			`}`
Fix prefix highlighting 2023-05-04 22:53:50 +08:00			`pub fn is_prefix(&self) -> bool {`
			`self.is_prefix`
			`}`
Matching words fixes 2023-04-11 21:41:44 +08:00			`pub fn original_word(&self, ctx: &SearchContext) -> String {`
			`ctx.word_interner.get(self.original).clone()`
			`}`
			`pub fn all_computed_derivations(&self) -> (Vec<Interned<String>>, Vec<Interned<Phrase>>) {`
			`let mut words = BTreeSet::new();`
			`let mut phrases = BTreeSet::new();`

			`let ZeroTypoTerm { phrase, exact: zero_typo, prefix_of, synonyms, use_prefix_db: _ } =`
			`&self.zero_typo;`
			`words.extend(zero_typo.iter().copied());`
			`words.extend(prefix_of.iter().copied());`
			`phrases.extend(phrase.iter().copied());`
			`phrases.extend(synonyms.iter().copied());`

			`if let Lazy::Init(OneTypoTerm { split_words, one_typo }) = &self.one_typo {`
			`words.extend(one_typo.iter().copied());`
			`phrases.extend(split_words.iter().copied());`
			`};`

			`if let Lazy::Init(TwoTypoTerm { two_typos }) = &self.two_typo {`
			`words.extend(two_typos.iter().copied());`
			`};`

			`(words.into_iter().collect(), phrases.into_iter().collect())`
			`}`
			`}`