mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Fix match count
This commit is contained in:
parent
56e0edd621
commit
3bb1e35ada
@ -1,12 +1,12 @@
|
|||||||
use std::cmp::{min, Reverse};
|
use std::cmp::{min, Reverse};
|
||||||
use std::collections::{BTreeMap, HashMap};
|
use std::collections::BTreeMap;
|
||||||
|
use std::fmt;
|
||||||
use std::ops::{Index, IndexMut};
|
use std::ops::{Index, IndexMut};
|
||||||
|
|
||||||
use levenshtein_automata::{Distance, DFA};
|
use levenshtein_automata::{Distance, DFA};
|
||||||
use meilisearch_tokenizer::Token;
|
use meilisearch_tokenizer::Token;
|
||||||
|
|
||||||
use crate::search::build_dfa;
|
use crate::search::build_dfa;
|
||||||
use crate::search::query_tree::{Operation, Query};
|
|
||||||
|
|
||||||
type IsPrefix = bool;
|
type IsPrefix = bool;
|
||||||
|
|
||||||
@ -14,83 +14,129 @@ type IsPrefix = bool;
|
|||||||
/// referencing words that match the given query tree.
|
/// referencing words that match the given query tree.
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct MatchingWords {
|
pub struct MatchingWords {
|
||||||
dfas: Vec<(DFA, String, u8, IsPrefix, usize)>,
|
inner: Vec<(Vec<MatchingWord>, Vec<PrimitiveWordId>)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MatchingWords {
|
impl MatchingWords {
|
||||||
pub fn from_query_tree(tree: &Operation) -> Self {
|
pub fn new(mut matching_words: Vec<(Vec<MatchingWord>, Vec<PrimitiveWordId>)>) -> Self {
|
||||||
// fetch matchable words from the query tree
|
// Sort word by len in DESC order prioritizing the longuest matches,
|
||||||
let mut dfas: Vec<_> = fetch_queries(tree)
|
|
||||||
.into_iter()
|
|
||||||
// create DFAs for each word
|
|
||||||
.map(|((w, t, p), id)| (build_dfa(w, t, p), w.to_string(), t, p, id))
|
|
||||||
.collect();
|
|
||||||
// Sort word by len in DESC order prioritizing the longuest word,
|
|
||||||
// in order to highlight the longuest part of the matched word.
|
// in order to highlight the longuest part of the matched word.
|
||||||
dfas.sort_unstable_by_key(|(_dfa, query_word, _typo, _is_prefix, _id)| {
|
matching_words.sort_unstable_by_key(|(mw, _)| Reverse((mw.len(), mw[0].word.len())));
|
||||||
Reverse(query_word.len())
|
|
||||||
});
|
Self { inner: matching_words }
|
||||||
Self { dfas }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the number of matching bytes if the word matches one of the query words.
|
pub fn match_token<'a, 'b>(&'a self, token: &'b Token<'b>) -> MatchesIter<'a, 'b> {
|
||||||
pub fn matching_bytes(&self, word_to_highlight: &Token) -> Option<usize> {
|
MatchesIter { inner: Box::new(self.inner.iter()), token }
|
||||||
self.matching_bytes_with_id(word_to_highlight).map(|(len, _)| len)
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn matching_bytes_with_id(&self, word_to_highlight: &Token) -> Option<(usize, usize)> {
|
pub struct MatchesIter<'a, 'b> {
|
||||||
self.dfas.iter().find_map(|(dfa, query_word, typo, is_prefix, id)| {
|
inner: Box<dyn Iterator<Item = &'a (Vec<MatchingWord>, Vec<PrimitiveWordId>)> + 'a>,
|
||||||
match dfa.eval(word_to_highlight.text()) {
|
token: &'b Token<'b>,
|
||||||
Distance::Exact(t) if t <= *typo => {
|
}
|
||||||
if *is_prefix {
|
|
||||||
let len = bytes_to_highlight(word_to_highlight.text(), query_word);
|
impl<'a> Iterator for MatchesIter<'a, '_> {
|
||||||
Some((word_to_highlight.num_chars_from_bytes(len), *id))
|
type Item = MatchType<'a>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
match self.inner.next() {
|
||||||
|
Some((matching_words, ids)) => match matching_words[0].match_token(&self.token) {
|
||||||
|
Some(char_len) => {
|
||||||
|
if matching_words.len() > 1 {
|
||||||
|
Some(MatchType::Partial(PartialMatch {
|
||||||
|
matching_words: &matching_words[1..],
|
||||||
|
ids,
|
||||||
|
char_len,
|
||||||
|
}))
|
||||||
} else {
|
} else {
|
||||||
Some((
|
Some(MatchType::Full { char_len, ids })
|
||||||
word_to_highlight.num_chars_from_bytes(word_to_highlight.text().len()),
|
}
|
||||||
*id,
|
}
|
||||||
))
|
None => self.next(),
|
||||||
|
},
|
||||||
|
None => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type PrimitiveWordId = u8;
|
||||||
|
pub struct MatchingWord {
|
||||||
|
pub dfa: DFA,
|
||||||
|
pub word: String,
|
||||||
|
pub typo: u8,
|
||||||
|
pub prefix: IsPrefix,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for MatchingWord {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
f.debug_struct("MatchingWord")
|
||||||
|
.field("word", &self.word)
|
||||||
|
.field("typo", &self.typo)
|
||||||
|
.field("prefix", &self.prefix)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq for MatchingWord {
|
||||||
|
fn eq(&self, other: &Self) -> bool {
|
||||||
|
self.prefix == other.prefix && self.typo == other.typo && self.word == other.word
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MatchingWord {
|
||||||
|
pub fn new(word: String, typo: u8, prefix: IsPrefix) -> Self {
|
||||||
|
let dfa = build_dfa(&word, typo, prefix);
|
||||||
|
|
||||||
|
Self { dfa, word, typo, prefix }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn match_token(&self, token: &Token) -> Option<usize> {
|
||||||
|
match self.dfa.eval(token.text()) {
|
||||||
|
Distance::Exact(t) if t <= self.typo => {
|
||||||
|
if self.prefix {
|
||||||
|
let len = bytes_to_highlight(token.text(), &self.word);
|
||||||
|
Some(token.num_chars_from_bytes(len))
|
||||||
|
} else {
|
||||||
|
Some(token.num_chars_from_bytes(token.text().len()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_otherwise => None,
|
_otherwise => None,
|
||||||
}
|
}
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Lists all words which can be considered as a match for the query tree.
|
#[derive(Debug, PartialEq)]
|
||||||
fn fetch_queries(tree: &Operation) -> HashMap<(&str, u8, IsPrefix), usize> {
|
pub enum MatchType<'a> {
|
||||||
fn resolve_ops<'a>(
|
Full { char_len: usize, ids: &'a [PrimitiveWordId] },
|
||||||
tree: &'a Operation,
|
Partial(PartialMatch<'a>),
|
||||||
out: &mut HashMap<(&'a str, u8, IsPrefix), usize>,
|
}
|
||||||
id: &mut usize,
|
|
||||||
) {
|
#[derive(Debug, PartialEq)]
|
||||||
match tree {
|
pub struct PartialMatch<'a> {
|
||||||
Operation::Or(_, ops) | Operation::And(ops) => {
|
matching_words: &'a [MatchingWord],
|
||||||
ops.as_slice().iter().for_each(|op| resolve_ops(op, out, id));
|
ids: &'a [PrimitiveWordId],
|
||||||
}
|
char_len: usize,
|
||||||
Operation::Query(Query { prefix, kind }) => {
|
}
|
||||||
let typo = if kind.is_exact() { 0 } else { kind.typo() };
|
|
||||||
out.entry((kind.word(), typo, *prefix)).or_insert_with(|| {
|
impl<'a> PartialMatch<'a> {
|
||||||
*id += 1;
|
pub fn match_token(self, token: &Token) -> Option<MatchType<'a>> {
|
||||||
*id
|
self.matching_words[0].match_token(token).map(|char_len| {
|
||||||
});
|
if self.matching_words.len() > 1 {
|
||||||
}
|
MatchType::Partial(PartialMatch {
|
||||||
Operation::Phrase(words) => {
|
matching_words: &self.matching_words[1..],
|
||||||
for word in words {
|
ids: self.ids,
|
||||||
out.entry((word, 0, false)).or_insert_with(|| {
|
char_len,
|
||||||
*id += 1;
|
})
|
||||||
*id
|
} else {
|
||||||
});
|
MatchType::Full { char_len, ids: self.ids }
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut queries = HashMap::new();
|
pub fn char_len(&self) -> usize {
|
||||||
let mut id = 0;
|
self.char_len
|
||||||
resolve_ops(tree, &mut queries, &mut id);
|
}
|
||||||
queries
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// A simple wrapper around vec so we can get contiguous but index it like it's 2D array.
|
// A simple wrapper around vec so we can get contiguous but index it like it's 2D array.
|
||||||
@ -203,7 +249,6 @@ mod tests {
|
|||||||
use meilisearch_tokenizer::TokenKind;
|
use meilisearch_tokenizer::TokenKind;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::search::query_tree::{Operation, Query, QueryKind};
|
|
||||||
use crate::MatchingWords;
|
use crate::MatchingWords;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -271,102 +316,104 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn matching_words() {
|
fn matching_words() {
|
||||||
let query_tree = Operation::Or(
|
let matching_words = vec![
|
||||||
false,
|
(vec![MatchingWord::new("split".to_string(), 1, true)], vec![0]),
|
||||||
vec![Operation::And(vec![
|
(vec![MatchingWord::new("this".to_string(), 0, false)], vec![1]),
|
||||||
Operation::Query(Query {
|
(vec![MatchingWord::new("world".to_string(), 1, true)], vec![2]),
|
||||||
prefix: true,
|
];
|
||||||
kind: QueryKind::exact("split".to_string()),
|
|
||||||
}),
|
|
||||||
Operation::Query(Query {
|
|
||||||
prefix: false,
|
|
||||||
kind: QueryKind::exact("this".to_string()),
|
|
||||||
}),
|
|
||||||
Operation::Query(Query {
|
|
||||||
prefix: true,
|
|
||||||
kind: QueryKind::tolerant(1, "world".to_string()),
|
|
||||||
}),
|
|
||||||
])],
|
|
||||||
);
|
|
||||||
|
|
||||||
let matching_words = MatchingWords::from_query_tree(&query_tree);
|
let matching_words = MatchingWords::new(matching_words);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
matching_words.matching_bytes(&Token {
|
matching_words
|
||||||
|
.match_token(&Token {
|
||||||
kind: TokenKind::Word,
|
kind: TokenKind::Word,
|
||||||
word: Cow::Borrowed("word"),
|
word: Cow::Borrowed("word"),
|
||||||
byte_start: 0,
|
byte_start: 0,
|
||||||
char_index: 0,
|
char_index: 0,
|
||||||
byte_end: "word".len(),
|
byte_end: "word".len(),
|
||||||
char_map: None,
|
char_map: None,
|
||||||
}),
|
})
|
||||||
Some(3)
|
.next(),
|
||||||
|
Some(MatchType::Full { char_len: 3, ids: &[2] })
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
matching_words.matching_bytes(&Token {
|
matching_words
|
||||||
|
.match_token(&Token {
|
||||||
kind: TokenKind::Word,
|
kind: TokenKind::Word,
|
||||||
word: Cow::Borrowed("nyc"),
|
word: Cow::Borrowed("nyc"),
|
||||||
byte_start: 0,
|
byte_start: 0,
|
||||||
char_index: 0,
|
char_index: 0,
|
||||||
byte_end: "nyc".len(),
|
byte_end: "nyc".len(),
|
||||||
char_map: None,
|
char_map: None,
|
||||||
}),
|
})
|
||||||
|
.next(),
|
||||||
None
|
None
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
matching_words.matching_bytes(&Token {
|
matching_words
|
||||||
|
.match_token(&Token {
|
||||||
kind: TokenKind::Word,
|
kind: TokenKind::Word,
|
||||||
word: Cow::Borrowed("world"),
|
word: Cow::Borrowed("world"),
|
||||||
byte_start: 0,
|
byte_start: 0,
|
||||||
char_index: 0,
|
char_index: 0,
|
||||||
byte_end: "world".len(),
|
byte_end: "world".len(),
|
||||||
char_map: None,
|
char_map: None,
|
||||||
}),
|
})
|
||||||
Some(5)
|
.next(),
|
||||||
|
Some(MatchType::Full { char_len: 5, ids: &[2] })
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
matching_words.matching_bytes(&Token {
|
matching_words
|
||||||
|
.match_token(&Token {
|
||||||
kind: TokenKind::Word,
|
kind: TokenKind::Word,
|
||||||
word: Cow::Borrowed("splitted"),
|
word: Cow::Borrowed("splitted"),
|
||||||
byte_start: 0,
|
byte_start: 0,
|
||||||
char_index: 0,
|
char_index: 0,
|
||||||
byte_end: "splitted".len(),
|
byte_end: "splitted".len(),
|
||||||
char_map: None,
|
char_map: None,
|
||||||
}),
|
})
|
||||||
Some(5)
|
.next(),
|
||||||
|
Some(MatchType::Full { char_len: 5, ids: &[0] })
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
matching_words.matching_bytes(&Token {
|
matching_words
|
||||||
|
.match_token(&Token {
|
||||||
kind: TokenKind::Word,
|
kind: TokenKind::Word,
|
||||||
word: Cow::Borrowed("thisnew"),
|
word: Cow::Borrowed("thisnew"),
|
||||||
byte_start: 0,
|
byte_start: 0,
|
||||||
char_index: 0,
|
char_index: 0,
|
||||||
byte_end: "thisnew".len(),
|
byte_end: "thisnew".len(),
|
||||||
char_map: None,
|
char_map: None,
|
||||||
}),
|
})
|
||||||
|
.next(),
|
||||||
None
|
None
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
matching_words.matching_bytes(&Token {
|
matching_words
|
||||||
|
.match_token(&Token {
|
||||||
kind: TokenKind::Word,
|
kind: TokenKind::Word,
|
||||||
word: Cow::Borrowed("borld"),
|
word: Cow::Borrowed("borld"),
|
||||||
byte_start: 0,
|
byte_start: 0,
|
||||||
char_index: 0,
|
char_index: 0,
|
||||||
byte_end: "borld".len(),
|
byte_end: "borld".len(),
|
||||||
char_map: None,
|
char_map: None,
|
||||||
}),
|
})
|
||||||
Some(5)
|
.next(),
|
||||||
|
Some(MatchType::Full { char_len: 5, ids: &[2] })
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
matching_words.matching_bytes(&Token {
|
matching_words
|
||||||
|
.match_token(&Token {
|
||||||
kind: TokenKind::Word,
|
kind: TokenKind::Word,
|
||||||
word: Cow::Borrowed("wordsplit"),
|
word: Cow::Borrowed("wordsplit"),
|
||||||
byte_start: 0,
|
byte_start: 0,
|
||||||
char_index: 0,
|
char_index: 0,
|
||||||
byte_end: "wordsplit".len(),
|
byte_end: "wordsplit".len(),
|
||||||
char_map: None,
|
char_map: None,
|
||||||
}),
|
})
|
||||||
Some(4)
|
.next(),
|
||||||
|
Some(MatchType::Full { char_len: 4, ids: &[2] })
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,11 +1,10 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
pub use matching_words::MatchingWords;
|
pub use matching_words::MatchingWords;
|
||||||
|
use matching_words::{MatchType, PrimitiveWordId};
|
||||||
use meilisearch_tokenizer::token::{SeparatorKind, Token};
|
use meilisearch_tokenizer::token::{SeparatorKind, Token};
|
||||||
|
|
||||||
use crate::search::query_tree::Operation;
|
pub mod matching_words;
|
||||||
|
|
||||||
mod matching_words;
|
|
||||||
|
|
||||||
const DEFAULT_CROP_SIZE: usize = 10;
|
const DEFAULT_CROP_SIZE: usize = 10;
|
||||||
const DEFAULT_CROP_MARKER: &'static str = "…";
|
const DEFAULT_CROP_MARKER: &'static str = "…";
|
||||||
@ -21,18 +20,6 @@ pub struct MatcherBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl MatcherBuilder {
|
impl MatcherBuilder {
|
||||||
pub fn from_query_tree(query_tree: &Operation) -> Self {
|
|
||||||
let matching_words = MatchingWords::from_query_tree(query_tree);
|
|
||||||
|
|
||||||
Self {
|
|
||||||
matching_words,
|
|
||||||
crop_size: DEFAULT_CROP_SIZE,
|
|
||||||
crop_marker: None,
|
|
||||||
highlight_prefix: None,
|
|
||||||
highlight_suffix: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn from_matching_words(matching_words: MatchingWords) -> Self {
|
pub fn from_matching_words(matching_words: MatchingWords) -> Self {
|
||||||
Self {
|
Self {
|
||||||
matching_words,
|
matching_words,
|
||||||
@ -93,8 +80,8 @@ impl MatcherBuilder {
|
|||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct Match {
|
pub struct Match {
|
||||||
match_len: usize,
|
match_len: usize,
|
||||||
// id of the query word that matches.
|
// ids of the query words that matches.
|
||||||
id: usize,
|
ids: Vec<PrimitiveWordId>,
|
||||||
// position of the word in the whole text.
|
// position of the word in the whole text.
|
||||||
word_position: usize,
|
word_position: usize,
|
||||||
// position of the token in the whole text.
|
// position of the token in the whole text.
|
||||||
@ -123,10 +110,72 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
let mut matches = Vec::new();
|
let mut matches = Vec::new();
|
||||||
let mut word_position = 0;
|
let mut word_position = 0;
|
||||||
let mut token_position = 0;
|
let mut token_position = 0;
|
||||||
for token in self.tokens {
|
while let Some(token) = self.tokens.get(token_position) {
|
||||||
if token.is_separator().is_none() {
|
if token.is_separator().is_none() {
|
||||||
if let Some((match_len, id)) = self.matching_words.matching_bytes_with_id(&token) {
|
'matches: for match_type in self.matching_words.match_token(&token) {
|
||||||
matches.push(Match { match_len, id, word_position, token_position });
|
match match_type {
|
||||||
|
MatchType::Full { char_len, ids } => {
|
||||||
|
matches.push(Match {
|
||||||
|
match_len: char_len,
|
||||||
|
ids: ids.to_vec(),
|
||||||
|
word_position,
|
||||||
|
token_position,
|
||||||
|
});
|
||||||
|
// stop on the first match
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
MatchType::Partial(mut partial) => {
|
||||||
|
let mut potential_matches =
|
||||||
|
vec![(token_position, word_position, partial.char_len())];
|
||||||
|
let mut t_position = 1;
|
||||||
|
let mut w_position = 1;
|
||||||
|
'partials: for token in &self.tokens[token_position + 1..] {
|
||||||
|
if token.is_separator().is_none() {
|
||||||
|
partial = match partial.match_token(&token) {
|
||||||
|
Some(MatchType::Partial(partial)) => {
|
||||||
|
potential_matches.push((
|
||||||
|
token_position + t_position,
|
||||||
|
word_position + w_position,
|
||||||
|
partial.char_len(),
|
||||||
|
));
|
||||||
|
partial
|
||||||
|
}
|
||||||
|
// partial match is now full, we keep this matches and we advance positions
|
||||||
|
Some(MatchType::Full { char_len, ids }) => {
|
||||||
|
let iter = potential_matches.into_iter().map(
|
||||||
|
|(token_position, word_position, match_len)| {
|
||||||
|
Match {
|
||||||
|
match_len,
|
||||||
|
ids: ids.to_vec(),
|
||||||
|
word_position,
|
||||||
|
token_position,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
matches.extend(iter);
|
||||||
|
|
||||||
|
word_position += w_position;
|
||||||
|
token_position += t_position;
|
||||||
|
|
||||||
|
matches.push(Match {
|
||||||
|
match_len: char_len,
|
||||||
|
ids: ids.to_vec(),
|
||||||
|
word_position,
|
||||||
|
token_position,
|
||||||
|
});
|
||||||
|
|
||||||
|
break 'matches;
|
||||||
|
}
|
||||||
|
// no match, continue to next match.
|
||||||
|
None => break 'partials,
|
||||||
|
};
|
||||||
|
w_position += 1;
|
||||||
|
}
|
||||||
|
t_position += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
word_position += 1;
|
word_position += 1;
|
||||||
}
|
}
|
||||||
@ -229,7 +278,7 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn match_interval_score(&self, matches: &[Match]) -> (i16, i16, i16) {
|
fn match_interval_score(&self, matches: &[Match]) -> (i16, i16, i16) {
|
||||||
let mut ids = Vec::with_capacity(matches.len());
|
let mut ids: Vec<PrimitiveWordId> = Vec::with_capacity(matches.len());
|
||||||
let mut order_score = 0;
|
let mut order_score = 0;
|
||||||
let mut distance_score = 0;
|
let mut distance_score = 0;
|
||||||
|
|
||||||
@ -237,7 +286,7 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
while let Some(m) = iter.next() {
|
while let Some(m) = iter.next() {
|
||||||
if let Some(next_match) = iter.peek() {
|
if let Some(next_match) = iter.peek() {
|
||||||
// if matches are ordered
|
// if matches are ordered
|
||||||
if next_match.id > m.id {
|
if next_match.ids.iter().min() > m.ids.iter().min() {
|
||||||
order_score += 1;
|
order_score += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -245,7 +294,7 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
distance_score -= (next_match.word_position - m.word_position).min(7) as i16;
|
distance_score -= (next_match.word_position - m.word_position).min(7) as i16;
|
||||||
}
|
}
|
||||||
|
|
||||||
ids.push(m.id);
|
ids.extend(m.ids.iter());
|
||||||
}
|
}
|
||||||
|
|
||||||
ids.sort_unstable();
|
ids.sort_unstable();
|
||||||
@ -348,7 +397,8 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
.char_indices()
|
.char_indices()
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.find(|(i, _)| *i == m.match_len)
|
.find(|(i, _)| *i == m.match_len)
|
||||||
.map_or(token.byte_end, |(_, (i, _))| i + token.byte_start);
|
.map_or(token.byte_end, |(_, (i, _))| i + token.byte_start)
|
||||||
|
.min(token.byte_end);
|
||||||
formatted.push(self.highlight_prefix);
|
formatted.push(self.highlight_prefix);
|
||||||
formatted.push(&self.text[token.byte_start..highlight_byte_index]);
|
formatted.push(&self.text[token.byte_start..highlight_byte_index]);
|
||||||
formatted.push(self.highlight_suffix);
|
formatted.push(self.highlight_suffix);
|
||||||
@ -386,33 +436,23 @@ mod tests {
|
|||||||
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::search::query_tree::{Query, QueryKind};
|
use crate::search::matches::matching_words::MatchingWord;
|
||||||
|
|
||||||
fn query_tree() -> Operation {
|
fn matching_words() -> MatchingWords {
|
||||||
Operation::Or(
|
let matching_words = vec![
|
||||||
false,
|
(vec![MatchingWord::new("split".to_string(), 0, false)], vec![0]),
|
||||||
vec![Operation::And(vec![
|
(vec![MatchingWord::new("the".to_string(), 0, false)], vec![1]),
|
||||||
Operation::Query(Query {
|
(vec![MatchingWord::new("world".to_string(), 1, true)], vec![2]),
|
||||||
prefix: true,
|
];
|
||||||
kind: QueryKind::exact("split".to_string()),
|
|
||||||
}),
|
MatchingWords::new(matching_words)
|
||||||
Operation::Query(Query {
|
|
||||||
prefix: false,
|
|
||||||
kind: QueryKind::exact("the".to_string()),
|
|
||||||
}),
|
|
||||||
Operation::Query(Query {
|
|
||||||
prefix: true,
|
|
||||||
kind: QueryKind::tolerant(1, "world".to_string()),
|
|
||||||
}),
|
|
||||||
])],
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn format_identity() {
|
fn format_identity() {
|
||||||
let query_tree = query_tree();
|
let matching_words = matching_words();
|
||||||
|
|
||||||
let builder = MatcherBuilder::from_query_tree(&query_tree);
|
let builder = MatcherBuilder::from_matching_words(matching_words);
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
||||||
|
|
||||||
let highlight = false;
|
let highlight = false;
|
||||||
@ -445,9 +485,9 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn format_highlight() {
|
fn format_highlight() {
|
||||||
let query_tree = query_tree();
|
let matching_words = matching_words();
|
||||||
|
|
||||||
let builder = MatcherBuilder::from_query_tree(&query_tree);
|
let builder = MatcherBuilder::from_matching_words(matching_words);
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
||||||
|
|
||||||
let highlight = true;
|
let highlight = true;
|
||||||
@ -497,21 +537,14 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn highlight_unicode() {
|
fn highlight_unicode() {
|
||||||
let query_tree = Operation::Or(
|
let matching_words = vec![
|
||||||
false,
|
(vec![MatchingWord::new("wessfali".to_string(), 1, true)], vec![0]),
|
||||||
vec![Operation::And(vec![
|
(vec![MatchingWord::new("world".to_string(), 1, true)], vec![1]),
|
||||||
Operation::Query(Query {
|
];
|
||||||
prefix: true,
|
|
||||||
kind: QueryKind::tolerant(1, "wessfalia".to_string()),
|
|
||||||
}),
|
|
||||||
Operation::Query(Query {
|
|
||||||
prefix: true,
|
|
||||||
kind: QueryKind::tolerant(1, "world".to_string()),
|
|
||||||
}),
|
|
||||||
])],
|
|
||||||
);
|
|
||||||
|
|
||||||
let builder = MatcherBuilder::from_query_tree(&query_tree);
|
let matching_words = MatchingWords::new(matching_words);
|
||||||
|
|
||||||
|
let builder = MatcherBuilder::from_matching_words(matching_words);
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
||||||
|
|
||||||
let highlight = true;
|
let highlight = true;
|
||||||
@ -539,14 +572,14 @@ mod tests {
|
|||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
let tokens: Vec<_> = analyzed.tokens().collect();
|
||||||
let mut matcher = builder.build(&tokens[..], text);
|
let mut matcher = builder.build(&tokens[..], text);
|
||||||
// no crop should return complete text with highlighted matches.
|
// no crop should return complete text with highlighted matches.
|
||||||
assert_eq!(&matcher.format(highlight, crop), "<em>Westfália</em>");
|
assert_eq!(&matcher.format(highlight, crop), "<em>Westfáli</em>a");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn format_crop() {
|
fn format_crop() {
|
||||||
let query_tree = query_tree();
|
let matching_words = matching_words();
|
||||||
|
|
||||||
let builder = MatcherBuilder::from_query_tree(&query_tree);
|
let builder = MatcherBuilder::from_matching_words(matching_words);
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
||||||
|
|
||||||
let highlight = false;
|
let highlight = false;
|
||||||
@ -657,9 +690,9 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn format_highlight_crop() {
|
fn format_highlight_crop() {
|
||||||
let query_tree = query_tree();
|
let matching_words = matching_words();
|
||||||
|
|
||||||
let builder = MatcherBuilder::from_query_tree(&query_tree);
|
let builder = MatcherBuilder::from_matching_words(matching_words);
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
||||||
|
|
||||||
let highlight = true;
|
let highlight = true;
|
||||||
@ -724,9 +757,9 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn smaller_crop_size() {
|
fn smaller_crop_size() {
|
||||||
//! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
|
//! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
|
||||||
let query_tree = query_tree();
|
let matching_words = matching_words();
|
||||||
|
|
||||||
let mut builder = MatcherBuilder::from_query_tree(&query_tree);
|
let mut builder = MatcherBuilder::from_matching_words(matching_words);
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
||||||
|
|
||||||
let highlight = false;
|
let highlight = false;
|
||||||
|
@ -114,7 +114,7 @@ impl<'a> Search<'a> {
|
|||||||
pub fn execute(&self) -> Result<SearchResult> {
|
pub fn execute(&self) -> Result<SearchResult> {
|
||||||
// We create the query tree by spliting the query into tokens.
|
// We create the query tree by spliting the query into tokens.
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
let (query_tree, primitive_query) = match self.query.as_ref() {
|
let (query_tree, primitive_query, matching_words) = match self.query.as_ref() {
|
||||||
Some(query) => {
|
Some(query) => {
|
||||||
let mut builder = QueryTreeBuilder::new(self.rtxn, self.index);
|
let mut builder = QueryTreeBuilder::new(self.rtxn, self.index);
|
||||||
builder.optional_words(self.optional_words);
|
builder.optional_words(self.optional_words);
|
||||||
@ -132,9 +132,11 @@ impl<'a> Search<'a> {
|
|||||||
let analyzer = Analyzer::new(config);
|
let analyzer = Analyzer::new(config);
|
||||||
let result = analyzer.analyze(query);
|
let result = analyzer.analyze(query);
|
||||||
let tokens = result.tokens();
|
let tokens = result.tokens();
|
||||||
builder.build(tokens)?.map_or((None, None), |(qt, pq)| (Some(qt), Some(pq)))
|
builder
|
||||||
|
.build(tokens)?
|
||||||
|
.map_or((None, None, None), |(qt, pq, mw)| (Some(qt), Some(pq), Some(mw)))
|
||||||
}
|
}
|
||||||
None => (None, None),
|
None => (None, None, None),
|
||||||
};
|
};
|
||||||
|
|
||||||
debug!("query tree: {:?} took {:.02?}", query_tree, before.elapsed());
|
debug!("query tree: {:?} took {:.02?}", query_tree, before.elapsed());
|
||||||
@ -148,11 +150,6 @@ impl<'a> Search<'a> {
|
|||||||
|
|
||||||
debug!("facet candidates: {:?} took {:.02?}", filtered_candidates, before.elapsed());
|
debug!("facet candidates: {:?} took {:.02?}", filtered_candidates, before.elapsed());
|
||||||
|
|
||||||
let matching_words = match query_tree.as_ref() {
|
|
||||||
Some(query_tree) => MatchingWords::from_query_tree(&query_tree),
|
|
||||||
None => MatchingWords::default(),
|
|
||||||
};
|
|
||||||
|
|
||||||
// We check that we are allowed to use the sort criteria, we check
|
// We check that we are allowed to use the sort criteria, we check
|
||||||
// that they are declared in the sortable fields.
|
// that they are declared in the sortable fields.
|
||||||
if let Some(sort_criteria) = &self.sort_criteria {
|
if let Some(sort_criteria) = &self.sort_criteria {
|
||||||
@ -193,13 +190,13 @@ impl<'a> Search<'a> {
|
|||||||
)?;
|
)?;
|
||||||
|
|
||||||
match self.index.distinct_field(self.rtxn)? {
|
match self.index.distinct_field(self.rtxn)? {
|
||||||
None => self.perform_sort(NoopDistinct, matching_words, criteria),
|
None => self.perform_sort(NoopDistinct, matching_words.unwrap_or_default(), criteria),
|
||||||
Some(name) => {
|
Some(name) => {
|
||||||
let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||||
match field_ids_map.id(name) {
|
match field_ids_map.id(name) {
|
||||||
Some(fid) => {
|
Some(fid) => {
|
||||||
let distinct = FacetDistinct::new(fid, self.index, self.rtxn);
|
let distinct = FacetDistinct::new(fid, self.index, self.rtxn);
|
||||||
self.perform_sort(distinct, matching_words, criteria)
|
self.perform_sort(distinct, matching_words.unwrap_or_default(), criteria)
|
||||||
}
|
}
|
||||||
None => Ok(SearchResult::default()),
|
None => Ok(SearchResult::default()),
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,8 @@ use meilisearch_tokenizer::TokenKind;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
||||||
use crate::{Index, Result};
|
use crate::search::matches::matching_words::{MatchingWord, PrimitiveWordId};
|
||||||
|
use crate::{Index, MatchingWords, Result};
|
||||||
|
|
||||||
type IsOptionalWord = bool;
|
type IsOptionalWord = bool;
|
||||||
type IsPrefix = bool;
|
type IsPrefix = bool;
|
||||||
@ -233,7 +234,10 @@ impl<'a> QueryTreeBuilder<'a> {
|
|||||||
/// - if `authorize_typos` is set to `false` the query tree will be generated
|
/// - if `authorize_typos` is set to `false` the query tree will be generated
|
||||||
/// forcing all query words to match documents without any typo
|
/// forcing all query words to match documents without any typo
|
||||||
/// (the criterion `typo` will be ignored)
|
/// (the criterion `typo` will be ignored)
|
||||||
pub fn build(&self, query: TokenStream) -> Result<Option<(Operation, PrimitiveQuery)>> {
|
pub fn build(
|
||||||
|
&self,
|
||||||
|
query: TokenStream,
|
||||||
|
) -> Result<Option<(Operation, PrimitiveQuery, MatchingWords)>> {
|
||||||
let stop_words = self.index.stop_words(self.rtxn)?;
|
let stop_words = self.index.stop_words(self.rtxn)?;
|
||||||
let primitive_query = create_primitive_query(query, stop_words, self.words_limit);
|
let primitive_query = create_primitive_query(query, stop_words, self.words_limit);
|
||||||
if !primitive_query.is_empty() {
|
if !primitive_query.is_empty() {
|
||||||
@ -243,7 +247,9 @@ impl<'a> QueryTreeBuilder<'a> {
|
|||||||
self.authorize_typos,
|
self.authorize_typos,
|
||||||
&primitive_query,
|
&primitive_query,
|
||||||
)?;
|
)?;
|
||||||
Ok(Some((qt, primitive_query)))
|
let matching_words =
|
||||||
|
create_matching_words(self, self.authorize_typos, &primitive_query)?;
|
||||||
|
Ok(Some((qt, primitive_query, matching_words)))
|
||||||
} else {
|
} else {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
}
|
}
|
||||||
@ -251,7 +257,7 @@ impl<'a> QueryTreeBuilder<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Split the word depending on the frequency of subwords in the database documents.
|
/// Split the word depending on the frequency of subwords in the database documents.
|
||||||
fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<Operation>> {
|
fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<(String, String)>> {
|
||||||
let chars = word.char_indices().skip(1);
|
let chars = word.char_indices().skip(1);
|
||||||
let mut best = None;
|
let mut best = None;
|
||||||
|
|
||||||
@ -267,7 +273,7 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<O
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(best.map(|(_, left, right)| Operation::Phrase(vec![left.to_string(), right.to_string()])))
|
Ok(best.map(|(_, left, right)| (left.to_string(), right.to_string())))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@ -336,8 +342,8 @@ fn create_query_tree(
|
|||||||
// 4. wrap all in an OR operation
|
// 4. wrap all in an OR operation
|
||||||
PrimitiveQueryPart::Word(word, prefix) => {
|
PrimitiveQueryPart::Word(word, prefix) => {
|
||||||
let mut children = synonyms(ctx, &[&word])?.unwrap_or_default();
|
let mut children = synonyms(ctx, &[&word])?.unwrap_or_default();
|
||||||
if let Some(child) = split_best_frequency(ctx, &word)? {
|
if let Some((left, right)) = split_best_frequency(ctx, &word)? {
|
||||||
children.push(child);
|
children.push(Operation::Phrase(vec![left, right]));
|
||||||
}
|
}
|
||||||
let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?;
|
let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?;
|
||||||
let exact_words = ctx.exact_words()?;
|
let exact_words = ctx.exact_words()?;
|
||||||
@ -464,6 +470,154 @@ fn create_query_tree(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Main function that matchings words used for crop and highlight.
|
||||||
|
fn create_matching_words(
|
||||||
|
ctx: &impl Context,
|
||||||
|
authorize_typos: bool,
|
||||||
|
query: &[PrimitiveQueryPart],
|
||||||
|
) -> Result<MatchingWords> {
|
||||||
|
/// Matches on the `PrimitiveQueryPart` and create matchings words from it.
|
||||||
|
fn resolve_primitive_part(
|
||||||
|
ctx: &impl Context,
|
||||||
|
authorize_typos: bool,
|
||||||
|
part: PrimitiveQueryPart,
|
||||||
|
matching_words: &mut Vec<(Vec<MatchingWord>, Vec<PrimitiveWordId>)>,
|
||||||
|
id: PrimitiveWordId,
|
||||||
|
) -> Result<()> {
|
||||||
|
match part {
|
||||||
|
// 1. try to split word in 2
|
||||||
|
// 2. try to fetch synonyms
|
||||||
|
PrimitiveQueryPart::Word(word, prefix) => {
|
||||||
|
if let Some(synonyms) = ctx.synonyms(&[word.as_str()])? {
|
||||||
|
for synonym in synonyms {
|
||||||
|
let synonym = synonym
|
||||||
|
.into_iter()
|
||||||
|
.map(|syn| MatchingWord::new(syn.to_string(), 0, false))
|
||||||
|
.collect();
|
||||||
|
matching_words.push((synonym, vec![id]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some((left, right)) = split_best_frequency(ctx, &word)? {
|
||||||
|
let left = MatchingWord::new(left, 0, false);
|
||||||
|
let right = MatchingWord::new(right, 0, false);
|
||||||
|
matching_words.push((vec![left, right], vec![id]));
|
||||||
|
}
|
||||||
|
|
||||||
|
let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?;
|
||||||
|
let exact_words = ctx.exact_words()?;
|
||||||
|
let config =
|
||||||
|
TypoConfig { max_typos: 2, word_len_one_typo, word_len_two_typo, exact_words };
|
||||||
|
|
||||||
|
let matching_word = match typos(word, authorize_typos, config) {
|
||||||
|
QueryKind::Exact { word, .. } => MatchingWord::new(word, 0, prefix),
|
||||||
|
QueryKind::Tolerant { typo, word } => MatchingWord::new(word, typo, prefix),
|
||||||
|
};
|
||||||
|
matching_words.push((vec![matching_word], vec![id]));
|
||||||
|
}
|
||||||
|
// create a CONSECUTIVE matchings words wrapping all word in the phrase
|
||||||
|
PrimitiveQueryPart::Phrase(words) => {
|
||||||
|
let ids: Vec<_> =
|
||||||
|
(0..words.len()).into_iter().map(|i| id + i as PrimitiveWordId).collect();
|
||||||
|
let words =
|
||||||
|
words.into_iter().map(|w| MatchingWord::new(w.to_string(), 0, false)).collect();
|
||||||
|
matching_words.push((words, ids));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create all ngrams 1..=3 generating query tree branches.
|
||||||
|
fn ngrams(
|
||||||
|
ctx: &impl Context,
|
||||||
|
authorize_typos: bool,
|
||||||
|
query: &[PrimitiveQueryPart],
|
||||||
|
matching_words: &mut Vec<(Vec<MatchingWord>, Vec<PrimitiveWordId>)>,
|
||||||
|
mut id: PrimitiveWordId,
|
||||||
|
) -> Result<()> {
|
||||||
|
const MAX_NGRAM: usize = 3;
|
||||||
|
|
||||||
|
for sub_query in query.linear_group_by(|a, b| !(a.is_phrase() || b.is_phrase())) {
|
||||||
|
for ngram in 1..=MAX_NGRAM.min(sub_query.len()) {
|
||||||
|
if let Some(group) = sub_query.get(..ngram) {
|
||||||
|
let tail = &sub_query[ngram..];
|
||||||
|
let is_last = tail.is_empty();
|
||||||
|
|
||||||
|
match group {
|
||||||
|
[part] => {
|
||||||
|
resolve_primitive_part(
|
||||||
|
ctx,
|
||||||
|
authorize_typos,
|
||||||
|
part.clone(),
|
||||||
|
matching_words,
|
||||||
|
id,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
words => {
|
||||||
|
let is_prefix = words.last().map_or(false, |part| part.is_prefix());
|
||||||
|
let words: Vec<_> = words
|
||||||
|
.iter()
|
||||||
|
.filter_map(|part| {
|
||||||
|
if let PrimitiveQueryPart::Word(word, _) = part {
|
||||||
|
Some(word.as_str())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
let ids: Vec<_> = (0..words.len())
|
||||||
|
.into_iter()
|
||||||
|
.map(|i| id + i as PrimitiveWordId)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if let Some(synonyms) = ctx.synonyms(&words)? {
|
||||||
|
for synonym in synonyms {
|
||||||
|
let synonym = synonym
|
||||||
|
.into_iter()
|
||||||
|
.map(|syn| MatchingWord::new(syn.to_string(), 0, false))
|
||||||
|
.collect();
|
||||||
|
matching_words.push((synonym, ids.clone()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let word = words.concat();
|
||||||
|
let (word_len_one_typo, word_len_two_typo) =
|
||||||
|
ctx.min_word_len_for_typo()?;
|
||||||
|
let exact_words = ctx.exact_words()?;
|
||||||
|
let config = TypoConfig {
|
||||||
|
max_typos: 1,
|
||||||
|
word_len_one_typo,
|
||||||
|
word_len_two_typo,
|
||||||
|
exact_words,
|
||||||
|
};
|
||||||
|
let matching_word = match typos(word, authorize_typos, config) {
|
||||||
|
QueryKind::Exact { word, .. } => {
|
||||||
|
MatchingWord::new(word, 0, is_prefix)
|
||||||
|
}
|
||||||
|
QueryKind::Tolerant { typo, word } => {
|
||||||
|
MatchingWord::new(word, typo, is_prefix)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
matching_words.push((vec![matching_word], ids));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !is_last {
|
||||||
|
ngrams(ctx, authorize_typos, tail, matching_words, id + 1)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
id += sub_query.iter().map(|x| x.len() as PrimitiveWordId).sum::<PrimitiveWordId>();
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut matching_words = Vec::new();
|
||||||
|
ngrams(ctx, authorize_typos, query, &mut matching_words, 0)?;
|
||||||
|
Ok(MatchingWords::new(matching_words))
|
||||||
|
}
|
||||||
|
|
||||||
pub type PrimitiveQuery = Vec<PrimitiveQueryPart>;
|
pub type PrimitiveQuery = Vec<PrimitiveQueryPart>;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
@ -480,6 +634,13 @@ impl PrimitiveQueryPart {
|
|||||||
fn is_prefix(&self) -> bool {
|
fn is_prefix(&self) -> bool {
|
||||||
matches!(self, Self::Word(_, is_prefix) if *is_prefix)
|
matches!(self, Self::Word(_, is_prefix) if *is_prefix)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn len(&self) -> usize {
|
||||||
|
match self {
|
||||||
|
Self::Phrase(words) => words.len(),
|
||||||
|
Self::Word(_, _) => 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create primitive query from tokenized query string,
|
/// Create primitive query from tokenized query string,
|
||||||
|
Loading…
Reference in New Issue
Block a user