mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-27 04:25:06 +08:00
Update resolve_graph module to work with lazy query terms
This commit is contained in:
parent
d0f048c068
commit
b96a682f16
@ -28,12 +28,13 @@ pub use logger::{DefaultSearchLogger, SearchLogger};
|
|||||||
use query_graph::{QueryGraph, QueryNode, QueryNodeData};
|
use query_graph::{QueryGraph, QueryNode, QueryNodeData};
|
||||||
use query_term::{located_query_terms_from_string, Phrase, QueryTerm};
|
use query_term::{located_query_terms_from_string, Phrase, QueryTerm};
|
||||||
use ranking_rules::{bucket_sort, PlaceholderQuery, RankingRuleOutput, RankingRuleQueryTrait};
|
use ranking_rules::{bucket_sort, PlaceholderQuery, RankingRuleOutput, RankingRuleQueryTrait};
|
||||||
use resolve_query_graph::{resolve_query_graph, QueryTermDocIdsCache};
|
use resolve_query_graph::PhraseDocIdsCache;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use words::Words;
|
use words::Words;
|
||||||
|
|
||||||
use self::interner::Interner;
|
use self::interner::Interner;
|
||||||
use self::ranking_rules::{BoxRankingRule, RankingRule};
|
use self::ranking_rules::{BoxRankingRule, RankingRule};
|
||||||
|
use self::resolve_query_graph::compute_query_graph_docids;
|
||||||
use self::sort::Sort;
|
use self::sort::Sort;
|
||||||
use crate::{
|
use crate::{
|
||||||
AscDesc, Filter, Index, MatchingWords, Member, Result, SearchResult, TermsMatchingStrategy,
|
AscDesc, Filter, Index, MatchingWords, Member, Result, SearchResult, TermsMatchingStrategy,
|
||||||
@ -48,8 +49,7 @@ pub struct SearchContext<'ctx> {
|
|||||||
pub word_interner: DedupInterner<String>,
|
pub word_interner: DedupInterner<String>,
|
||||||
pub phrase_interner: DedupInterner<Phrase>,
|
pub phrase_interner: DedupInterner<Phrase>,
|
||||||
pub term_interner: Interner<QueryTerm>,
|
pub term_interner: Interner<QueryTerm>,
|
||||||
// think about memory usage of that field (roaring bitmaps in a hashmap)
|
pub phrase_docids: PhraseDocIdsCache,
|
||||||
pub term_docids: QueryTermDocIdsCache,
|
|
||||||
}
|
}
|
||||||
impl<'ctx> SearchContext<'ctx> {
|
impl<'ctx> SearchContext<'ctx> {
|
||||||
pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Self {
|
pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Self {
|
||||||
@ -60,7 +60,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
word_interner: <_>::default(),
|
word_interner: <_>::default(),
|
||||||
phrase_interner: <_>::default(),
|
phrase_interner: <_>::default(),
|
||||||
term_interner: <_>::default(),
|
term_interner: <_>::default(),
|
||||||
term_docids: <_>::default(),
|
phrase_docids: <_>::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -103,7 +103,7 @@ fn resolve_maximally_reduced_query_graph(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
logger.query_for_universe(&graph);
|
logger.query_for_universe(&graph);
|
||||||
let docids = resolve_query_graph(ctx, &graph, universe)?;
|
let docids = compute_query_graph_docids(ctx, &graph, universe)?;
|
||||||
|
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
}
|
}
|
||||||
@ -319,7 +319,7 @@ pub fn execute_search(
|
|||||||
let tokens = tokenizer.tokenize(query);
|
let tokens = tokenizer.tokenize(query);
|
||||||
|
|
||||||
let query_terms = located_query_terms_from_string(ctx, tokens, words_limit)?;
|
let query_terms = located_query_terms_from_string(ctx, tokens, words_limit)?;
|
||||||
let graph = QueryGraph::from_query(ctx, query_terms)?;
|
let graph = QueryGraph::from_query(ctx, &query_terms)?;
|
||||||
|
|
||||||
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
||||||
|
|
||||||
|
@ -3,106 +3,63 @@
|
|||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
|
|
||||||
use fxhash::FxHashMap;
|
use fxhash::FxHashMap;
|
||||||
use heed::{BytesDecode, RoTxn};
|
use heed::BytesDecode;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::db_cache::DatabaseCache;
|
use super::interner::Interned;
|
||||||
use super::interner::{DedupInterner, Interned};
|
|
||||||
use super::query_graph::QueryNodeData;
|
use super::query_graph::QueryNodeData;
|
||||||
use super::query_term::{Phrase, QueryTerm};
|
use super::query_term::{Phrase, QueryTermSubset};
|
||||||
use super::small_bitmap::SmallBitmap;
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::{QueryGraph, SearchContext};
|
use super::{QueryGraph, SearchContext};
|
||||||
use crate::{CboRoaringBitmapCodec, Index, Result, RoaringBitmapCodec};
|
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||||
|
use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec};
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct QueryTermDocIdsCache {
|
pub struct PhraseDocIdsCache {
|
||||||
pub phrases: FxHashMap<Interned<Phrase>, RoaringBitmap>,
|
pub cache: FxHashMap<Interned<Phrase>, RoaringBitmap>,
|
||||||
pub terms: FxHashMap<Interned<QueryTerm>, RoaringBitmap>,
|
|
||||||
}
|
}
|
||||||
impl QueryTermDocIdsCache {
|
impl<'ctx> SearchContext<'ctx> {
|
||||||
/// Get the document ids associated with the given phrase
|
/// Get the document ids associated with the given phrase
|
||||||
pub fn get_phrase_docids<'s, 'ctx>(
|
pub fn get_phrase_docids(&mut self, phrase: Interned<Phrase>) -> Result<&RoaringBitmap> {
|
||||||
&'s mut self,
|
if self.phrase_docids.cache.contains_key(&phrase) {
|
||||||
index: &Index,
|
return Ok(&self.phrase_docids.cache[&phrase]);
|
||||||
txn: &'ctx RoTxn,
|
|
||||||
db_cache: &mut DatabaseCache<'ctx>,
|
|
||||||
word_interner: &DedupInterner<String>,
|
|
||||||
phrase_interner: &DedupInterner<Phrase>,
|
|
||||||
phrase: Interned<Phrase>,
|
|
||||||
) -> Result<&'s RoaringBitmap> {
|
|
||||||
if self.phrases.contains_key(&phrase) {
|
|
||||||
return Ok(&self.phrases[&phrase]);
|
|
||||||
};
|
};
|
||||||
let docids = resolve_phrase(index, txn, db_cache, word_interner, phrase_interner, phrase)?;
|
let docids = compute_phrase_docids(self, phrase)?;
|
||||||
let _ = self.phrases.insert(phrase, docids);
|
let _ = self.phrase_docids.cache.insert(phrase, docids);
|
||||||
let docids = &self.phrases[&phrase];
|
let docids = &self.phrase_docids.cache[&phrase];
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
}
|
}
|
||||||
/// Get the document ids associated with the given term
|
}
|
||||||
pub fn get_query_term_docids<'s, 'ctx>(
|
pub fn compute_query_term_subset_docids(
|
||||||
&'s mut self,
|
ctx: &mut SearchContext,
|
||||||
index: &Index,
|
term: &QueryTermSubset,
|
||||||
txn: &'ctx RoTxn,
|
) -> Result<RoaringBitmap> {
|
||||||
db_cache: &mut DatabaseCache<'ctx>,
|
|
||||||
word_interner: &DedupInterner<String>,
|
|
||||||
term_interner: &DedupInterner<QueryTerm>,
|
|
||||||
phrase_interner: &DedupInterner<Phrase>,
|
|
||||||
term_interned: Interned<QueryTerm>,
|
|
||||||
) -> Result<&'s RoaringBitmap> {
|
|
||||||
if self.terms.contains_key(&term_interned) {
|
|
||||||
return Ok(&self.terms[&term_interned]);
|
|
||||||
};
|
|
||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
// TODO: use a MultiOps?
|
for word in term.all_single_words_except_prefix_db(ctx)? {
|
||||||
let term = term_interner.get(term_interned);
|
if let Some(word_docids) = ctx.get_db_word_docids(word)? {
|
||||||
for word in term.all_single_words_except_prefix_db() {
|
docids |= RoaringBitmapCodec::bytes_decode(word_docids).ok_or(heed::Error::Decoding)?;
|
||||||
if let Some(word_docids) = db_cache.get_word_docids(index, txn, word_interner, word)? {
|
|
||||||
docids |=
|
|
||||||
RoaringBitmapCodec::bytes_decode(word_docids).ok_or(heed::Error::Decoding)?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for phrase in term.all_phrases() {
|
for phrase in term.all_phrases(ctx)? {
|
||||||
docids |= self.get_phrase_docids(
|
docids |= ctx.get_phrase_docids(phrase)?;
|
||||||
index,
|
|
||||||
txn,
|
|
||||||
db_cache,
|
|
||||||
word_interner,
|
|
||||||
phrase_interner,
|
|
||||||
phrase,
|
|
||||||
)?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(prefix) = term.use_prefix_db {
|
if let Some(prefix) = term.use_prefix_db(ctx) {
|
||||||
if let Some(prefix_docids) =
|
if let Some(prefix_docids) = ctx.get_db_word_prefix_docids(prefix)? {
|
||||||
db_cache.get_word_prefix_docids(index, txn, word_interner, prefix)?
|
|
||||||
{
|
|
||||||
docids |=
|
docids |=
|
||||||
RoaringBitmapCodec::bytes_decode(prefix_docids).ok_or(heed::Error::Decoding)?;
|
RoaringBitmapCodec::bytes_decode(prefix_docids).ok_or(heed::Error::Decoding)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let _ = self.terms.insert(term_interned, docids);
|
|
||||||
let docids = &self.terms[&term_interned];
|
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn resolve_query_graph(
|
pub fn compute_query_graph_docids(
|
||||||
ctx: &mut SearchContext,
|
ctx: &mut SearchContext,
|
||||||
q: &QueryGraph,
|
q: &QueryGraph,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
let SearchContext {
|
// TODO: there must be a faster way to compute this big
|
||||||
index,
|
|
||||||
txn,
|
|
||||||
db_cache,
|
|
||||||
word_interner,
|
|
||||||
phrase_interner,
|
|
||||||
term_interner,
|
|
||||||
term_docids: query_term_docids,
|
|
||||||
..
|
|
||||||
} = ctx;
|
|
||||||
// TODO: there is a faster way to compute this big
|
|
||||||
// roaring bitmap expression
|
// roaring bitmap expression
|
||||||
|
|
||||||
let mut nodes_resolved = SmallBitmap::for_interned_values_in(&q.nodes);
|
let mut nodes_resolved = SmallBitmap::for_interned_values_in(&q.nodes);
|
||||||
@ -125,17 +82,13 @@ pub fn resolve_query_graph(
|
|||||||
}
|
}
|
||||||
|
|
||||||
let node_docids = match &node.data {
|
let node_docids = match &node.data {
|
||||||
QueryNodeData::Term(located_term) => {
|
QueryNodeData::Term(LocatedQueryTermSubset {
|
||||||
let term_docids = query_term_docids.get_query_term_docids(
|
term_subset,
|
||||||
index,
|
positions: _,
|
||||||
txn,
|
term_ids: _,
|
||||||
db_cache,
|
}) => {
|
||||||
word_interner,
|
let phrase_docids = compute_query_term_subset_docids(ctx, term_subset)?;
|
||||||
term_interner,
|
predecessors_docids & phrase_docids
|
||||||
phrase_interner,
|
|
||||||
located_term.value,
|
|
||||||
)?;
|
|
||||||
predecessors_docids & term_docids
|
|
||||||
}
|
}
|
||||||
QueryNodeData::Deleted => {
|
QueryNodeData::Deleted => {
|
||||||
panic!()
|
panic!()
|
||||||
@ -163,15 +116,11 @@ pub fn resolve_query_graph(
|
|||||||
panic!()
|
panic!()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn resolve_phrase<'ctx>(
|
pub fn compute_phrase_docids(
|
||||||
index: &Index,
|
ctx: &mut SearchContext,
|
||||||
txn: &'ctx RoTxn,
|
|
||||||
db_cache: &mut DatabaseCache<'ctx>,
|
|
||||||
word_interner: &DedupInterner<String>,
|
|
||||||
phrase_interner: &DedupInterner<Phrase>,
|
|
||||||
phrase: Interned<Phrase>,
|
phrase: Interned<Phrase>,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
let Phrase { words } = phrase_interner.get(phrase).clone();
|
let Phrase { words } = ctx.phrase_interner.get(phrase).clone();
|
||||||
let mut candidates = RoaringBitmap::new();
|
let mut candidates = RoaringBitmap::new();
|
||||||
let mut first_iter = true;
|
let mut first_iter = true;
|
||||||
let winsize = words.len().min(3);
|
let winsize = words.len().min(3);
|
||||||
@ -195,14 +144,7 @@ pub fn resolve_phrase<'ctx>(
|
|||||||
.filter_map(|(index, word)| word.as_ref().map(|word| (index, word)))
|
.filter_map(|(index, word)| word.as_ref().map(|word| (index, word)))
|
||||||
{
|
{
|
||||||
if dist == 0 {
|
if dist == 0 {
|
||||||
match db_cache.get_word_pair_proximity_docids(
|
match ctx.get_db_word_pair_proximity_docids(s1, s2, 1)? {
|
||||||
index,
|
|
||||||
txn,
|
|
||||||
word_interner,
|
|
||||||
s1,
|
|
||||||
s2,
|
|
||||||
1,
|
|
||||||
)? {
|
|
||||||
Some(m) => bitmaps.push(CboRoaringBitmapCodec::deserialize_from(m)?),
|
Some(m) => bitmaps.push(CboRoaringBitmapCodec::deserialize_from(m)?),
|
||||||
// If there are no documents for this pair, there will be no
|
// If there are no documents for this pair, there will be no
|
||||||
// results for the phrase query.
|
// results for the phrase query.
|
||||||
@ -211,14 +153,9 @@ pub fn resolve_phrase<'ctx>(
|
|||||||
} else {
|
} else {
|
||||||
let mut bitmap = RoaringBitmap::new();
|
let mut bitmap = RoaringBitmap::new();
|
||||||
for dist in 0..=dist {
|
for dist in 0..=dist {
|
||||||
if let Some(m) = db_cache.get_word_pair_proximity_docids(
|
if let Some(m) =
|
||||||
index,
|
ctx.get_db_word_pair_proximity_docids(s1, s2, dist as u8 + 1)?
|
||||||
txn,
|
{
|
||||||
word_interner,
|
|
||||||
s1,
|
|
||||||
s2,
|
|
||||||
dist as u8 + 1,
|
|
||||||
)? {
|
|
||||||
bitmap |= CboRoaringBitmapCodec::deserialize_from(m)?;
|
bitmap |= CboRoaringBitmapCodec::deserialize_from(m)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::logger::SearchLogger;
|
use super::logger::SearchLogger;
|
||||||
use super::query_graph::QueryNodeData;
|
use super::query_graph::QueryNodeData;
|
||||||
use super::resolve_query_graph::resolve_query_graph;
|
use super::resolve_query_graph::compute_query_graph_docids;
|
||||||
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
||||||
use crate::{Result, TermsMatchingStrategy};
|
use crate::{Result, TermsMatchingStrategy};
|
||||||
|
|
||||||
@ -80,7 +80,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for Words {
|
|||||||
|
|
||||||
logger.log_words_state(query_graph);
|
logger.log_words_state(query_graph);
|
||||||
|
|
||||||
let this_bucket = resolve_query_graph(ctx, query_graph, universe)?;
|
let this_bucket = compute_query_graph_docids(ctx, query_graph, universe)?;
|
||||||
|
|
||||||
let child_query_graph = query_graph.clone();
|
let child_query_graph = query_graph.clone();
|
||||||
loop {
|
loop {
|
||||||
|
Loading…
Reference in New Issue
Block a user