2023-03-08 16:55:53 +08:00
|
|
|
use std::collections::BTreeSet;
|
|
|
|
|
|
|
|
use roaring::RoaringBitmap;
|
|
|
|
|
2023-02-22 22:34:37 +08:00
|
|
|
use super::logger::SearchLogger;
|
2023-03-14 23:37:47 +08:00
|
|
|
use super::query_graph::QueryNodeData;
|
2023-03-30 17:10:38 +08:00
|
|
|
use super::resolve_query_graph::compute_query_graph_docids;
|
2023-03-14 23:37:47 +08:00
|
|
|
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
2023-03-07 02:21:55 +08:00
|
|
|
use crate::{Result, TermsMatchingStrategy};
|
2023-02-21 16:49:25 +08:00
|
|
|
|
|
|
|
pub struct Words {
|
2023-03-21 17:44:40 +08:00
|
|
|
exhausted: bool, // TODO: remove
|
2023-02-21 16:49:25 +08:00
|
|
|
query_graph: Option<QueryGraph>,
|
2023-03-21 17:44:40 +08:00
|
|
|
iterating: bool, // TODO: remove
|
2023-02-21 16:49:25 +08:00
|
|
|
positions_to_remove: Vec<i8>,
|
|
|
|
terms_matching_strategy: TermsMatchingStrategy,
|
|
|
|
}
|
|
|
|
impl Words {
|
|
|
|
pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {
|
|
|
|
Self {
|
|
|
|
exhausted: true,
|
|
|
|
query_graph: None,
|
|
|
|
iterating: false,
|
|
|
|
positions_to_remove: vec![],
|
|
|
|
terms_matching_strategy,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-13 21:03:48 +08:00
|
|
|
impl<'ctx> RankingRule<'ctx, QueryGraph> for Words {
|
2023-02-22 22:34:37 +08:00
|
|
|
fn id(&self) -> String {
|
|
|
|
"words".to_owned()
|
|
|
|
}
|
2023-02-21 16:49:25 +08:00
|
|
|
fn start_iteration(
|
|
|
|
&mut self,
|
2023-03-13 21:03:48 +08:00
|
|
|
_ctx: &mut SearchContext<'ctx>,
|
2023-02-28 18:49:24 +08:00
|
|
|
_logger: &mut dyn SearchLogger<QueryGraph>,
|
|
|
|
_parent_candidates: &RoaringBitmap,
|
2023-02-21 16:49:25 +08:00
|
|
|
parent_query_graph: &QueryGraph,
|
|
|
|
) -> Result<()> {
|
|
|
|
self.exhausted = false;
|
|
|
|
self.query_graph = Some(parent_query_graph.clone());
|
|
|
|
|
|
|
|
let positions_to_remove = match self.terms_matching_strategy {
|
|
|
|
TermsMatchingStrategy::Last => {
|
|
|
|
let mut all_positions = BTreeSet::new();
|
2023-03-14 23:37:47 +08:00
|
|
|
for (_, n) in parent_query_graph.nodes.iter() {
|
|
|
|
match &n.data {
|
|
|
|
QueryNodeData::Term(term) => {
|
2023-03-23 16:15:57 +08:00
|
|
|
all_positions.extend(term.positions.clone());
|
2023-02-21 16:49:25 +08:00
|
|
|
}
|
2023-03-14 23:37:47 +08:00
|
|
|
QueryNodeData::Deleted | QueryNodeData::Start | QueryNodeData::End => {}
|
2023-02-21 16:49:25 +08:00
|
|
|
}
|
|
|
|
}
|
2023-03-06 15:35:01 +08:00
|
|
|
let mut r: Vec<i8> = all_positions.into_iter().collect();
|
|
|
|
// don't remove the first term
|
|
|
|
r.remove(0);
|
|
|
|
r
|
2023-02-21 16:49:25 +08:00
|
|
|
}
|
|
|
|
TermsMatchingStrategy::All => vec![],
|
|
|
|
};
|
|
|
|
self.positions_to_remove = positions_to_remove;
|
|
|
|
self.iterating = true;
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
fn next_bucket(
|
|
|
|
&mut self,
|
2023-03-13 21:03:48 +08:00
|
|
|
ctx: &mut SearchContext<'ctx>,
|
2023-02-22 22:34:37 +08:00
|
|
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
2023-02-21 16:49:25 +08:00
|
|
|
universe: &RoaringBitmap,
|
|
|
|
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
|
|
|
assert!(self.iterating);
|
|
|
|
assert!(universe.len() > 1);
|
2023-02-23 20:13:19 +08:00
|
|
|
|
2023-02-21 16:49:25 +08:00
|
|
|
if self.exhausted {
|
|
|
|
return Ok(None);
|
|
|
|
}
|
|
|
|
let Some(query_graph) = &mut self.query_graph else { panic!() };
|
2023-02-21 20:57:34 +08:00
|
|
|
|
2023-02-23 20:13:19 +08:00
|
|
|
logger.log_words_state(query_graph);
|
|
|
|
|
2023-03-30 17:10:38 +08:00
|
|
|
let this_bucket = compute_query_graph_docids(ctx, query_graph, universe)?;
|
2023-02-21 20:57:34 +08:00
|
|
|
|
2023-02-21 16:49:25 +08:00
|
|
|
let child_query_graph = query_graph.clone();
|
2023-03-03 04:27:57 +08:00
|
|
|
loop {
|
|
|
|
if self.positions_to_remove.is_empty() {
|
|
|
|
self.exhausted = true;
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
let position_to_remove = self.positions_to_remove.pop().unwrap();
|
2023-03-08 20:26:29 +08:00
|
|
|
let did_delete_any_node =
|
|
|
|
query_graph.remove_words_starting_at_position(position_to_remove);
|
2023-03-03 04:27:57 +08:00
|
|
|
if did_delete_any_node {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2023-02-21 16:49:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket }))
|
|
|
|
}
|
|
|
|
|
|
|
|
fn end_iteration(
|
|
|
|
&mut self,
|
2023-03-13 21:03:48 +08:00
|
|
|
_ctx: &mut SearchContext<'ctx>,
|
2023-02-22 22:34:37 +08:00
|
|
|
_logger: &mut dyn SearchLogger<QueryGraph>,
|
2023-02-21 16:49:25 +08:00
|
|
|
) {
|
|
|
|
self.iterating = false;
|
|
|
|
self.exhausted = true;
|
|
|
|
self.positions_to_remove = vec![];
|
2023-03-06 15:35:01 +08:00
|
|
|
self.query_graph = None;
|
2023-02-21 16:49:25 +08:00
|
|
|
}
|
|
|
|
}
|