2023-03-06 03:07:37 +08:00
|
|
|
mod db_cache;
|
|
|
|
mod graph_based_ranking_rule;
|
2023-03-07 02:21:55 +08:00
|
|
|
mod interner;
|
2023-03-06 03:07:37 +08:00
|
|
|
mod logger;
|
|
|
|
mod query_graph;
|
|
|
|
mod query_term;
|
|
|
|
mod ranking_rule_graph;
|
|
|
|
mod ranking_rules;
|
|
|
|
mod resolve_query_graph;
|
2023-03-07 21:42:58 +08:00
|
|
|
mod small_bitmap;
|
2023-03-06 03:07:37 +08:00
|
|
|
mod sort;
|
|
|
|
mod words;
|
2023-02-21 16:41:58 +08:00
|
|
|
|
2023-03-07 21:42:58 +08:00
|
|
|
use self::interner::Interner;
|
|
|
|
use self::logger::SearchLogger;
|
|
|
|
use self::query_term::Phrase;
|
|
|
|
use self::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache};
|
|
|
|
use crate::new::query_term::located_query_terms_from_string;
|
|
|
|
use crate::{Filter, Index, Result, TermsMatchingStrategy};
|
2023-03-06 15:35:01 +08:00
|
|
|
use charabia::Tokenize;
|
|
|
|
use db_cache::DatabaseCache;
|
|
|
|
use heed::RoTxn;
|
|
|
|
use query_graph::{QueryGraph, QueryNode};
|
2023-03-07 21:42:58 +08:00
|
|
|
pub use ranking_rules::{
|
|
|
|
apply_ranking_rules, RankingRule, RankingRuleOutput, RankingRuleOutputIter,
|
|
|
|
RankingRuleOutputIterWrapper, RankingRuleQueryTrait,
|
2023-03-06 15:35:01 +08:00
|
|
|
};
|
2023-03-07 21:42:58 +08:00
|
|
|
use roaring::RoaringBitmap;
|
|
|
|
use std::collections::BTreeSet;
|
2023-02-21 16:41:58 +08:00
|
|
|
|
|
|
|
pub enum BitmapOrAllRef<'s> {
|
|
|
|
Bitmap(&'s RoaringBitmap),
|
|
|
|
All,
|
|
|
|
}
|
|
|
|
|
2023-03-07 02:21:55 +08:00
|
|
|
pub struct SearchContext<'search> {
|
|
|
|
pub index: &'search Index,
|
|
|
|
pub txn: &'search RoTxn<'search>,
|
|
|
|
pub db_cache: DatabaseCache<'search>,
|
|
|
|
pub word_interner: Interner<String>,
|
|
|
|
pub phrase_interner: Interner<Phrase>,
|
|
|
|
pub node_docids_cache: NodeDocIdsCache,
|
|
|
|
}
|
|
|
|
impl<'search> SearchContext<'search> {
|
|
|
|
pub fn new(index: &'search Index, txn: &'search RoTxn<'search>) -> Self {
|
|
|
|
Self {
|
|
|
|
index,
|
|
|
|
txn,
|
|
|
|
db_cache: <_>::default(),
|
|
|
|
word_interner: <_>::default(),
|
|
|
|
phrase_interner: <_>::default(),
|
|
|
|
node_docids_cache: <_>::default(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-06 15:35:01 +08:00
|
|
|
#[allow(clippy::too_many_arguments)]
|
2023-03-07 02:21:55 +08:00
|
|
|
pub fn resolve_maximally_reduced_query_graph<'search>(
|
|
|
|
ctx: &mut SearchContext<'search>,
|
2023-03-06 15:35:01 +08:00
|
|
|
universe: &RoaringBitmap,
|
|
|
|
query_graph: &QueryGraph,
|
|
|
|
matching_strategy: TermsMatchingStrategy,
|
|
|
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
|
|
|
) -> Result<RoaringBitmap> {
|
|
|
|
let mut graph = query_graph.clone();
|
|
|
|
let mut positions_to_remove = match matching_strategy {
|
|
|
|
TermsMatchingStrategy::Last => {
|
|
|
|
let mut all_positions = BTreeSet::new();
|
|
|
|
for n in query_graph.nodes.iter() {
|
|
|
|
match n {
|
|
|
|
QueryNode::Term(term) => {
|
|
|
|
all_positions.extend(term.positions.clone().into_iter());
|
|
|
|
}
|
|
|
|
QueryNode::Deleted | QueryNode::Start | QueryNode::End => {}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
all_positions.into_iter().collect()
|
|
|
|
}
|
|
|
|
TermsMatchingStrategy::All => vec![],
|
|
|
|
};
|
|
|
|
// don't remove the first term
|
|
|
|
positions_to_remove.remove(0);
|
|
|
|
loop {
|
|
|
|
if positions_to_remove.is_empty() {
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
let position_to_remove = positions_to_remove.pop().unwrap();
|
|
|
|
let _ = graph.remove_words_at_position(position_to_remove);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
logger.query_for_universe(&graph);
|
2023-03-07 02:21:55 +08:00
|
|
|
let docids = resolve_query_graph(ctx, &graph, universe)?;
|
2023-03-06 15:35:01 +08:00
|
|
|
|
|
|
|
Ok(docids)
|
|
|
|
}
|
|
|
|
|
|
|
|
#[allow(clippy::too_many_arguments)]
|
2023-03-07 02:21:55 +08:00
|
|
|
pub fn execute_search<'search>(
|
|
|
|
ctx: &mut SearchContext<'search>,
|
2023-02-21 16:41:58 +08:00
|
|
|
query: &str,
|
2023-03-06 15:35:01 +08:00
|
|
|
filters: Option<Filter>,
|
|
|
|
from: usize,
|
|
|
|
length: usize,
|
|
|
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
|
|
|
) -> Result<Vec<u32>> {
|
2023-02-21 16:41:58 +08:00
|
|
|
assert!(!query.is_empty());
|
2023-03-07 21:42:58 +08:00
|
|
|
let query_terms = located_query_terms_from_string(ctx, query.tokenize(), None)?;
|
2023-03-07 02:21:55 +08:00
|
|
|
let graph = QueryGraph::from_query(ctx, query_terms)?;
|
2023-03-06 15:35:01 +08:00
|
|
|
|
|
|
|
logger.initial_query(&graph);
|
|
|
|
|
|
|
|
let universe = if let Some(filters) = filters {
|
2023-03-07 02:21:55 +08:00
|
|
|
filters.evaluate(ctx.txn, ctx.index)?
|
2023-03-06 15:35:01 +08:00
|
|
|
} else {
|
2023-03-07 02:21:55 +08:00
|
|
|
ctx.index.documents_ids(ctx.txn)?
|
2023-03-06 15:35:01 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
let universe = resolve_maximally_reduced_query_graph(
|
2023-03-07 02:21:55 +08:00
|
|
|
ctx,
|
2023-03-06 15:35:01 +08:00
|
|
|
&universe,
|
|
|
|
&graph,
|
|
|
|
TermsMatchingStrategy::Last,
|
|
|
|
logger,
|
|
|
|
)?;
|
2023-03-07 21:42:58 +08:00
|
|
|
// TODO: create ranking rules here
|
2023-03-06 15:35:01 +08:00
|
|
|
|
|
|
|
logger.initial_universe(&universe);
|
|
|
|
|
2023-03-07 02:21:55 +08:00
|
|
|
apply_ranking_rules(ctx, &graph, &universe, from, length, logger)
|
2023-02-21 16:41:58 +08:00
|
|
|
}
|