2023-04-04 21:38:30 +08:00
|
|
|
mod compute_derivations;
|
|
|
|
mod ntypo_subset;
|
|
|
|
mod parse_query;
|
|
|
|
mod phrase;
|
|
|
|
|
|
|
|
use std::collections::BTreeSet;
|
2023-04-25 22:49:08 +08:00
|
|
|
use std::iter::FromIterator;
|
2023-04-04 21:38:30 +08:00
|
|
|
use std::ops::RangeInclusive;
|
|
|
|
|
2023-04-12 00:27:41 +08:00
|
|
|
use compute_derivations::partially_initialized_term_from_word;
|
2023-04-06 19:07:00 +08:00
|
|
|
use either::Either;
|
2023-04-04 21:38:30 +08:00
|
|
|
pub use ntypo_subset::NTypoTermSubset;
|
2023-05-03 00:53:01 +08:00
|
|
|
pub use parse_query::{located_query_terms_from_tokens, make_ngram, number_of_typos_allowed};
|
2023-04-04 21:38:30 +08:00
|
|
|
pub use phrase::Phrase;
|
|
|
|
|
2023-04-12 00:27:41 +08:00
|
|
|
use super::interner::{DedupInterner, Interned};
|
|
|
|
use super::{limits, SearchContext, Word};
|
|
|
|
use crate::Result;
|
2023-04-04 21:38:30 +08:00
|
|
|
|
2023-04-06 19:07:00 +08:00
|
|
|
/// A set of word derivations attached to a location in the search query.
|
2023-04-04 21:38:30 +08:00
|
|
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
|
|
|
pub struct LocatedQueryTermSubset {
|
|
|
|
pub term_subset: QueryTermSubset,
|
|
|
|
pub positions: RangeInclusive<u16>,
|
|
|
|
pub term_ids: RangeInclusive<u8>,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
|
|
|
pub struct QueryTermSubset {
|
|
|
|
original: Interned<QueryTerm>,
|
|
|
|
zero_typo_subset: NTypoTermSubset,
|
|
|
|
one_typo_subset: NTypoTermSubset,
|
|
|
|
two_typo_subset: NTypoTermSubset,
|
2023-04-25 22:49:08 +08:00
|
|
|
/// `true` if the term cannot be deleted through the term matching strategy
|
|
|
|
///
|
|
|
|
/// Note that there are other reasons for which a term cannot be deleted, such as
|
|
|
|
/// being a phrase. In that case, this field could be set to `false`, but it
|
|
|
|
/// still wouldn't be deleteable by the term matching strategy.
|
|
|
|
mandatory: bool,
|
2023-04-04 21:38:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
|
|
|
pub struct QueryTerm {
|
|
|
|
original: Interned<String>,
|
|
|
|
ngram_words: Option<Vec<Interned<String>>>,
|
2023-05-16 22:22:23 +08:00
|
|
|
max_levenshtein_distance: u8,
|
2023-04-04 21:38:30 +08:00
|
|
|
is_prefix: bool,
|
|
|
|
zero_typo: ZeroTypoTerm,
|
|
|
|
// May not be computed yet
|
|
|
|
one_typo: Lazy<OneTypoTerm>,
|
|
|
|
// May not be computed yet
|
|
|
|
two_typo: Lazy<TwoTypoTerm>,
|
|
|
|
}
|
|
|
|
|
|
|
|
// SubTerms will be in a dedup interner
|
|
|
|
#[derive(Default, Clone, PartialEq, Eq, Hash)]
|
|
|
|
struct ZeroTypoTerm {
|
|
|
|
/// The original phrase, if any
|
|
|
|
phrase: Option<Interned<Phrase>>,
|
|
|
|
/// A single word equivalent to the original term, with zero typos
|
2023-04-06 19:07:00 +08:00
|
|
|
exact: Option<Interned<String>>,
|
2023-04-04 21:38:30 +08:00
|
|
|
/// All the words that contain the original word as prefix
|
|
|
|
prefix_of: BTreeSet<Interned<String>>,
|
|
|
|
/// All the synonyms of the original word or phrase
|
|
|
|
synonyms: BTreeSet<Interned<Phrase>>,
|
|
|
|
/// A prefix in the prefix databases matching the original word
|
|
|
|
use_prefix_db: Option<Interned<String>>,
|
|
|
|
}
|
|
|
|
#[derive(Default, Clone, PartialEq, Eq, Hash)]
|
|
|
|
struct OneTypoTerm {
|
|
|
|
/// The original word split into multiple consecutive words
|
|
|
|
split_words: Option<Interned<Phrase>>,
|
|
|
|
/// Words that are 1 typo away from the original word
|
|
|
|
one_typo: BTreeSet<Interned<String>>,
|
|
|
|
}
|
|
|
|
#[derive(Default, Clone, PartialEq, Eq, Hash)]
|
|
|
|
struct TwoTypoTerm {
|
|
|
|
/// Words that are 2 typos away from the original word
|
|
|
|
two_typos: BTreeSet<Interned<String>>,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
|
|
|
pub enum Lazy<T> {
|
|
|
|
Uninit,
|
|
|
|
Init(T),
|
|
|
|
}
|
|
|
|
impl<T> Lazy<T> {
|
|
|
|
pub fn is_init(&self) -> bool {
|
|
|
|
match self {
|
|
|
|
Lazy::Uninit => false,
|
|
|
|
Lazy::Init(_) => true,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pub fn is_uninit(&self) -> bool {
|
|
|
|
match self {
|
|
|
|
Lazy::Uninit => true,
|
|
|
|
Lazy::Init(_) => false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-06 19:07:00 +08:00
|
|
|
#[derive(Clone, Copy)]
|
|
|
|
pub enum ExactTerm {
|
|
|
|
Phrase(Interned<Phrase>),
|
|
|
|
Word(Interned<String>),
|
|
|
|
}
|
|
|
|
|
|
|
|
impl ExactTerm {
|
|
|
|
pub fn interned_words<'ctx>(
|
|
|
|
&self,
|
|
|
|
ctx: &'ctx SearchContext<'ctx>,
|
|
|
|
) -> impl Iterator<Item = Option<Interned<String>>> + 'ctx {
|
|
|
|
match *self {
|
|
|
|
ExactTerm::Phrase(phrase) => {
|
|
|
|
let phrase = ctx.phrase_interner.get(phrase);
|
|
|
|
Either::Left(phrase.words.iter().copied())
|
|
|
|
}
|
|
|
|
ExactTerm::Word(word) => Either::Right(std::iter::once(Some(word))),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-04 21:38:30 +08:00
|
|
|
impl QueryTermSubset {
|
2023-04-25 22:49:08 +08:00
|
|
|
pub fn is_mandatory(&self) -> bool {
|
|
|
|
self.mandatory
|
|
|
|
}
|
|
|
|
pub fn make_mandatory(&mut self) {
|
|
|
|
self.mandatory = true;
|
|
|
|
}
|
2023-04-06 19:07:00 +08:00
|
|
|
pub fn exact_term(&self, ctx: &SearchContext) -> Option<ExactTerm> {
|
|
|
|
let full_query_term = ctx.term_interner.get(self.original);
|
|
|
|
if full_query_term.ngram_words.is_some() {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
if let Some(phrase) = full_query_term.zero_typo.phrase {
|
|
|
|
self.zero_typo_subset.contains_phrase(phrase).then_some(ExactTerm::Phrase(phrase))
|
|
|
|
} else if let Some(word) = full_query_term.zero_typo.exact {
|
|
|
|
self.zero_typo_subset.contains_word(word).then_some(ExactTerm::Word(word))
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-04 21:38:30 +08:00
|
|
|
pub fn empty(for_term: Interned<QueryTerm>) -> Self {
|
|
|
|
Self {
|
|
|
|
original: for_term,
|
|
|
|
zero_typo_subset: NTypoTermSubset::Nothing,
|
|
|
|
one_typo_subset: NTypoTermSubset::Nothing,
|
|
|
|
two_typo_subset: NTypoTermSubset::Nothing,
|
2023-04-25 22:49:08 +08:00
|
|
|
mandatory: false,
|
2023-04-04 21:38:30 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
pub fn full(for_term: Interned<QueryTerm>) -> Self {
|
|
|
|
Self {
|
|
|
|
original: for_term,
|
|
|
|
zero_typo_subset: NTypoTermSubset::All,
|
|
|
|
one_typo_subset: NTypoTermSubset::All,
|
|
|
|
two_typo_subset: NTypoTermSubset::All,
|
2023-04-25 22:49:08 +08:00
|
|
|
mandatory: false,
|
2023-04-04 21:38:30 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn union(&mut self, other: &Self) {
|
|
|
|
assert!(self.original == other.original);
|
|
|
|
self.zero_typo_subset.union(&other.zero_typo_subset);
|
|
|
|
self.one_typo_subset.union(&other.one_typo_subset);
|
|
|
|
self.two_typo_subset.union(&other.two_typo_subset);
|
|
|
|
}
|
|
|
|
pub fn intersect(&mut self, other: &Self) {
|
|
|
|
assert!(self.original == other.original);
|
|
|
|
self.zero_typo_subset.intersect(&other.zero_typo_subset);
|
|
|
|
self.one_typo_subset.intersect(&other.one_typo_subset);
|
|
|
|
self.two_typo_subset.intersect(&other.two_typo_subset);
|
|
|
|
}
|
|
|
|
|
2023-04-12 04:06:10 +08:00
|
|
|
pub fn use_prefix_db(&self, ctx: &SearchContext) -> Option<Word> {
|
2023-04-04 21:38:30 +08:00
|
|
|
let original = ctx.term_interner.get(self.original);
|
2023-07-03 16:20:28 +08:00
|
|
|
let Some(use_prefix_db) = original.zero_typo.use_prefix_db else { return None };
|
2023-04-12 04:06:10 +08:00
|
|
|
let word = match &self.zero_typo_subset {
|
2023-04-04 21:38:30 +08:00
|
|
|
NTypoTermSubset::All => Some(use_prefix_db),
|
|
|
|
NTypoTermSubset::Subset { words, phrases: _ } => {
|
|
|
|
if words.contains(&use_prefix_db) {
|
|
|
|
Some(use_prefix_db)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
NTypoTermSubset::Nothing => None,
|
2023-04-12 04:06:10 +08:00
|
|
|
};
|
|
|
|
word.map(|word| {
|
|
|
|
if original.ngram_words.is_some() {
|
|
|
|
Word::Derived(word)
|
|
|
|
} else {
|
|
|
|
Word::Original(word)
|
|
|
|
}
|
|
|
|
})
|
2023-04-04 21:38:30 +08:00
|
|
|
}
|
|
|
|
pub fn all_single_words_except_prefix_db(
|
|
|
|
&self,
|
|
|
|
ctx: &mut SearchContext,
|
2023-04-12 00:27:41 +08:00
|
|
|
) -> Result<BTreeSet<Word>> {
|
2023-04-04 21:38:30 +08:00
|
|
|
let mut result = BTreeSet::default();
|
|
|
|
if !self.one_typo_subset.is_empty() || !self.two_typo_subset.is_empty() {
|
|
|
|
self.original.compute_fully_if_needed(ctx)?;
|
|
|
|
}
|
|
|
|
|
|
|
|
let original = ctx.term_interner.get_mut(self.original);
|
2023-04-11 21:41:44 +08:00
|
|
|
match &self.zero_typo_subset {
|
|
|
|
NTypoTermSubset::All => {
|
|
|
|
let ZeroTypoTerm {
|
|
|
|
phrase: _,
|
|
|
|
exact: zero_typo,
|
|
|
|
prefix_of,
|
|
|
|
synonyms: _,
|
|
|
|
use_prefix_db: _,
|
|
|
|
} = &original.zero_typo;
|
2023-04-12 00:27:41 +08:00
|
|
|
result.extend(zero_typo.iter().copied().map(|w| {
|
|
|
|
if original.ngram_words.is_some() {
|
|
|
|
Word::Derived(w)
|
|
|
|
} else {
|
|
|
|
Word::Original(w)
|
|
|
|
}
|
|
|
|
}));
|
|
|
|
result.extend(prefix_of.iter().copied().map(|w| {
|
|
|
|
if original.ngram_words.is_some() {
|
|
|
|
Word::Derived(w)
|
|
|
|
} else {
|
|
|
|
Word::Original(w)
|
|
|
|
}
|
|
|
|
}));
|
2023-04-11 21:41:44 +08:00
|
|
|
}
|
|
|
|
NTypoTermSubset::Subset { words, phrases: _ } => {
|
|
|
|
let ZeroTypoTerm {
|
|
|
|
phrase: _,
|
|
|
|
exact: zero_typo,
|
|
|
|
prefix_of,
|
|
|
|
synonyms: _,
|
|
|
|
use_prefix_db: _,
|
|
|
|
} = &original.zero_typo;
|
|
|
|
if let Some(zero_typo) = zero_typo {
|
|
|
|
if words.contains(zero_typo) {
|
2023-04-12 00:27:41 +08:00
|
|
|
if original.ngram_words.is_some() {
|
|
|
|
result.insert(Word::Derived(*zero_typo));
|
|
|
|
} else {
|
|
|
|
result.insert(Word::Original(*zero_typo));
|
|
|
|
}
|
2023-04-11 21:41:44 +08:00
|
|
|
}
|
|
|
|
}
|
2023-04-12 22:47:31 +08:00
|
|
|
result.extend(prefix_of.intersection(words).copied().map(|w| {
|
|
|
|
if original.ngram_words.is_some() {
|
|
|
|
Word::Derived(w)
|
|
|
|
} else {
|
|
|
|
Word::Original(w)
|
|
|
|
}
|
|
|
|
}));
|
2023-04-11 21:41:44 +08:00
|
|
|
}
|
|
|
|
NTypoTermSubset::Nothing => {}
|
|
|
|
}
|
2023-04-04 21:38:30 +08:00
|
|
|
|
|
|
|
match &self.one_typo_subset {
|
|
|
|
NTypoTermSubset::All => {
|
2023-07-03 16:20:28 +08:00
|
|
|
let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo
|
|
|
|
else {
|
2023-04-04 21:38:30 +08:00
|
|
|
panic!()
|
|
|
|
};
|
2023-04-12 00:27:41 +08:00
|
|
|
result.extend(one_typo.iter().copied().map(Word::Derived))
|
2023-04-04 21:38:30 +08:00
|
|
|
}
|
|
|
|
NTypoTermSubset::Subset { words, phrases: _ } => {
|
2023-07-03 16:20:28 +08:00
|
|
|
let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo
|
|
|
|
else {
|
2023-04-04 21:38:30 +08:00
|
|
|
panic!()
|
|
|
|
};
|
2023-04-12 00:27:41 +08:00
|
|
|
result.extend(one_typo.intersection(words).copied().map(Word::Derived));
|
2023-04-04 21:38:30 +08:00
|
|
|
}
|
|
|
|
NTypoTermSubset::Nothing => {}
|
|
|
|
};
|
|
|
|
|
|
|
|
match &self.two_typo_subset {
|
|
|
|
NTypoTermSubset::All => {
|
2023-07-03 16:20:28 +08:00
|
|
|
let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else { panic!() };
|
2023-04-12 00:27:41 +08:00
|
|
|
result.extend(two_typos.iter().copied().map(Word::Derived));
|
2023-04-04 21:38:30 +08:00
|
|
|
}
|
|
|
|
NTypoTermSubset::Subset { words, phrases: _ } => {
|
2023-07-03 16:20:28 +08:00
|
|
|
let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else { panic!() };
|
2023-04-12 00:27:41 +08:00
|
|
|
result.extend(two_typos.intersection(words).copied().map(Word::Derived));
|
2023-04-04 21:38:30 +08:00
|
|
|
}
|
|
|
|
NTypoTermSubset::Nothing => {}
|
|
|
|
};
|
|
|
|
|
|
|
|
Ok(result)
|
|
|
|
}
|
|
|
|
pub fn all_phrases(&self, ctx: &mut SearchContext) -> Result<BTreeSet<Interned<Phrase>>> {
|
|
|
|
let mut result = BTreeSet::default();
|
|
|
|
|
|
|
|
if !self.one_typo_subset.is_empty() {
|
|
|
|
self.original.compute_fully_if_needed(ctx)?;
|
|
|
|
}
|
|
|
|
let original = ctx.term_interner.get_mut(self.original);
|
|
|
|
|
2023-04-06 19:07:00 +08:00
|
|
|
let ZeroTypoTerm { phrase, exact: _, prefix_of: _, synonyms, use_prefix_db: _ } =
|
2023-04-04 21:38:30 +08:00
|
|
|
&original.zero_typo;
|
|
|
|
result.extend(phrase.iter().copied());
|
|
|
|
result.extend(synonyms.iter().copied());
|
|
|
|
|
2023-04-11 21:41:44 +08:00
|
|
|
match &self.one_typo_subset {
|
|
|
|
NTypoTermSubset::All => {
|
2023-07-03 16:20:28 +08:00
|
|
|
let Lazy::Init(OneTypoTerm { split_words, one_typo: _ }) = &original.one_typo
|
|
|
|
else {
|
2023-04-11 21:41:44 +08:00
|
|
|
panic!();
|
|
|
|
};
|
|
|
|
result.extend(split_words.iter().copied());
|
|
|
|
}
|
|
|
|
NTypoTermSubset::Subset { phrases, .. } => {
|
2023-07-03 16:20:28 +08:00
|
|
|
let Lazy::Init(OneTypoTerm { split_words, one_typo: _ }) = &original.one_typo
|
|
|
|
else {
|
2023-04-11 21:41:44 +08:00
|
|
|
panic!();
|
|
|
|
};
|
|
|
|
if let Some(split_words) = split_words {
|
|
|
|
if phrases.contains(split_words) {
|
|
|
|
result.insert(*split_words);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
NTypoTermSubset::Nothing => {}
|
2023-04-04 21:38:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Ok(result)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn original_phrase(&self, ctx: &SearchContext) -> Option<Interned<Phrase>> {
|
|
|
|
let t = ctx.term_interner.get(self.original);
|
|
|
|
if let Some(p) = t.zero_typo.phrase {
|
|
|
|
if self.zero_typo_subset.contains_phrase(p) {
|
|
|
|
return Some(p);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
None
|
|
|
|
}
|
2023-05-16 22:22:23 +08:00
|
|
|
pub fn max_typo_cost(&self, ctx: &SearchContext) -> u8 {
|
2023-04-04 21:38:30 +08:00
|
|
|
let t = ctx.term_interner.get(self.original);
|
2023-05-16 22:22:23 +08:00
|
|
|
match t.max_levenshtein_distance {
|
|
|
|
0 => {
|
|
|
|
if t.allows_split_words() {
|
|
|
|
1
|
|
|
|
} else {
|
|
|
|
0
|
|
|
|
}
|
|
|
|
}
|
2023-04-04 21:38:30 +08:00
|
|
|
1 => {
|
|
|
|
if self.one_typo_subset.is_empty() {
|
|
|
|
0
|
|
|
|
} else {
|
|
|
|
1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
2 => {
|
|
|
|
if self.two_typo_subset.is_empty() {
|
|
|
|
if self.one_typo_subset.is_empty() {
|
|
|
|
0
|
|
|
|
} else {
|
|
|
|
1
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
2
|
|
|
|
}
|
|
|
|
}
|
|
|
|
_ => panic!(),
|
|
|
|
}
|
|
|
|
}
|
2023-04-25 22:49:08 +08:00
|
|
|
pub fn keep_only_exact_term(&mut self, ctx: &SearchContext) {
|
|
|
|
if let Some(term) = self.exact_term(ctx) {
|
|
|
|
match term {
|
|
|
|
ExactTerm::Phrase(p) => {
|
|
|
|
self.zero_typo_subset = NTypoTermSubset::Subset {
|
|
|
|
words: BTreeSet::new(),
|
|
|
|
phrases: BTreeSet::from_iter([p]),
|
|
|
|
};
|
|
|
|
self.clear_one_typo_subset();
|
|
|
|
self.clear_two_typo_subset();
|
|
|
|
}
|
|
|
|
ExactTerm::Word(w) => {
|
|
|
|
self.zero_typo_subset = NTypoTermSubset::Subset {
|
|
|
|
words: BTreeSet::from_iter([w]),
|
|
|
|
phrases: BTreeSet::new(),
|
|
|
|
};
|
|
|
|
self.clear_one_typo_subset();
|
|
|
|
self.clear_two_typo_subset();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-04-04 21:38:30 +08:00
|
|
|
pub fn clear_zero_typo_subset(&mut self) {
|
|
|
|
self.zero_typo_subset = NTypoTermSubset::Nothing;
|
|
|
|
}
|
|
|
|
pub fn clear_one_typo_subset(&mut self) {
|
|
|
|
self.one_typo_subset = NTypoTermSubset::Nothing;
|
|
|
|
}
|
|
|
|
pub fn clear_two_typo_subset(&mut self) {
|
|
|
|
self.two_typo_subset = NTypoTermSubset::Nothing;
|
|
|
|
}
|
|
|
|
pub fn description(&self, ctx: &SearchContext) -> String {
|
|
|
|
let t = ctx.term_interner.get(self.original);
|
|
|
|
ctx.word_interner.get(t.original).to_owned()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl ZeroTypoTerm {
|
|
|
|
fn is_empty(&self) -> bool {
|
2023-04-06 19:07:00 +08:00
|
|
|
let ZeroTypoTerm { phrase, exact: zero_typo, prefix_of, synonyms, use_prefix_db } = self;
|
2023-04-04 21:38:30 +08:00
|
|
|
phrase.is_none()
|
|
|
|
&& zero_typo.is_none()
|
|
|
|
&& prefix_of.is_empty()
|
|
|
|
&& synonyms.is_empty()
|
|
|
|
&& use_prefix_db.is_none()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
impl OneTypoTerm {
|
|
|
|
fn is_empty(&self) -> bool {
|
|
|
|
let OneTypoTerm { split_words, one_typo } = self;
|
|
|
|
one_typo.is_empty() && split_words.is_none()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
impl TwoTypoTerm {
|
|
|
|
fn is_empty(&self) -> bool {
|
|
|
|
let TwoTypoTerm { two_typos } = self;
|
|
|
|
two_typos.is_empty()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl QueryTerm {
|
|
|
|
fn is_empty(&self) -> bool {
|
|
|
|
let Lazy::Init(one_typo) = &self.one_typo else {
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
let Lazy::Init(two_typo) = &self.two_typo else {
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
|
|
|
self.zero_typo.is_empty() && one_typo.is_empty() && two_typo.is_empty()
|
|
|
|
}
|
2023-05-16 22:22:23 +08:00
|
|
|
fn allows_split_words(&self) -> bool {
|
|
|
|
self.zero_typo.phrase.is_none()
|
|
|
|
}
|
2023-04-04 21:38:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Interned<QueryTerm> {
|
|
|
|
/// Return the original word from the given query term
|
|
|
|
fn original_single_word(self, ctx: &SearchContext) -> Option<Interned<String>> {
|
|
|
|
let self_ = ctx.term_interner.get(self);
|
|
|
|
if self_.ngram_words.is_some() {
|
|
|
|
None
|
|
|
|
} else {
|
|
|
|
Some(self_.original)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// A query term coupled with its position in the user's search query.
|
|
|
|
#[derive(Clone)]
|
|
|
|
pub struct LocatedQueryTerm {
|
|
|
|
pub value: Interned<QueryTerm>,
|
|
|
|
pub positions: RangeInclusive<u16>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl LocatedQueryTerm {
|
|
|
|
/// Return `true` iff the term is empty
|
|
|
|
pub fn is_empty(&self, interner: &DedupInterner<QueryTerm>) -> bool {
|
|
|
|
interner.get(self.value).is_empty()
|
|
|
|
}
|
|
|
|
}
|
2023-04-11 21:41:44 +08:00
|
|
|
|
|
|
|
impl QueryTerm {
|
|
|
|
pub fn is_cached_prefix(&self) -> bool {
|
|
|
|
self.zero_typo.use_prefix_db.is_some()
|
|
|
|
}
|
2023-05-04 22:53:50 +08:00
|
|
|
pub fn is_prefix(&self) -> bool {
|
|
|
|
self.is_prefix
|
|
|
|
}
|
2023-04-11 21:41:44 +08:00
|
|
|
pub fn original_word(&self, ctx: &SearchContext) -> String {
|
|
|
|
ctx.word_interner.get(self.original).clone()
|
|
|
|
}
|
|
|
|
pub fn all_computed_derivations(&self) -> (Vec<Interned<String>>, Vec<Interned<Phrase>>) {
|
|
|
|
let mut words = BTreeSet::new();
|
|
|
|
let mut phrases = BTreeSet::new();
|
|
|
|
|
|
|
|
let ZeroTypoTerm { phrase, exact: zero_typo, prefix_of, synonyms, use_prefix_db: _ } =
|
|
|
|
&self.zero_typo;
|
|
|
|
words.extend(zero_typo.iter().copied());
|
|
|
|
words.extend(prefix_of.iter().copied());
|
|
|
|
phrases.extend(phrase.iter().copied());
|
|
|
|
phrases.extend(synonyms.iter().copied());
|
|
|
|
|
|
|
|
if let Lazy::Init(OneTypoTerm { split_words, one_typo }) = &self.one_typo {
|
|
|
|
words.extend(one_typo.iter().copied());
|
|
|
|
phrases.extend(split_words.iter().copied());
|
|
|
|
};
|
|
|
|
|
|
|
|
if let Lazy::Init(TwoTypoTerm { two_typos }) = &self.two_typo {
|
|
|
|
words.extend(two_typos.iter().copied());
|
|
|
|
};
|
|
|
|
|
|
|
|
(words.into_iter().collect(), phrases.into_iter().collect())
|
|
|
|
}
|
|
|
|
}
|