meilisearch/milli/src/search/new/words.rs

use std::collections::BTreeSet;

use heed::RoTxn;
use roaring::RoaringBitmap;

use super::db_cache::DatabaseCache;
use super::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache};
use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput};
use crate::{Index, Result, TermsMatchingStrategy};

pub struct Words {
    exhausted: bool,
    query_graph: Option<QueryGraph>,
    iterating: bool,
    positions_to_remove: Vec<i8>,
    terms_matching_strategy: TermsMatchingStrategy,
    node_docids_cache: NodeDocIdsCache,
}
impl Words {
    pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {
        Self {
            exhausted: true,
            query_graph: None,
            iterating: false,
            positions_to_remove: vec![],
            terms_matching_strategy,
            node_docids_cache: <_>::default(),
        }
    }
}

impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
    fn start_iteration(
        &mut self,
        _index: &Index,
        _txn: &'transaction RoTxn,
        _db_cache: &mut DatabaseCache<'transaction>,
        parent_candidates: &RoaringBitmap,
        parent_query_graph: &QueryGraph,
    ) -> Result<()> {
        // println!("Words: start iteration");
        self.exhausted = false;
        self.query_graph = Some(parent_query_graph.clone());

        // TODO: a phrase can contain many positions, but represents a single node.
        // That's a problem.
        let positions_to_remove = match self.terms_matching_strategy {
            TermsMatchingStrategy::Last => {
                let mut all_positions = BTreeSet::new();
                for n in parent_query_graph.nodes.iter() {
                    match n {
                        QueryNode::Term(term) => {
                            all_positions.extend(term.positions.clone().into_iter());
                        }
                        QueryNode::Deleted | QueryNode::Start | QueryNode::End => {}
                    }
                }
                all_positions.into_iter().collect()
            }
            TermsMatchingStrategy::All => vec![],
        };
        // println!("positions to remove: {positions_to_remove:?}");
        self.positions_to_remove = positions_to_remove;
        self.iterating = true;
        Ok(())
    }

    fn next_bucket(
        &mut self,
        index: &Index,
        txn: &'transaction RoTxn,
        db_cache: &mut DatabaseCache<'transaction>,
        universe: &RoaringBitmap,
    ) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
        // println!("Words: next bucket");
        assert!(self.iterating);
        assert!(universe.len() > 1);
        if self.exhausted {
            return Ok(None);
        }
        let Some(query_graph) = &mut self.query_graph else { panic!() };

        let this_bucket = resolve_query_graph(
            index,
            txn,
            db_cache,
            &mut self.node_docids_cache,
            query_graph,
            universe,
        )?;

        let child_query_graph = query_graph.clone();
        // TODO: Check whether a position exists in the graph before removing it and
        // returning the next bucket.
        // while graph.does_not_contain(positions_to_remove.last()) { positions_to_remove.pop() }
        if self.positions_to_remove.is_empty() {
            self.exhausted = true;
        } else {
            let position_to_remove = self.positions_to_remove.pop().unwrap();
            query_graph.remove_words_at_position(position_to_remove);
        }

        Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket }))
    }

    fn end_iteration(
        &mut self,
        _index: &Index,
        _txn: &'transaction RoTxn,
        _db_cache: &mut DatabaseCache<'transaction>,
    ) {
        // println!("Words: end iteration");
        self.iterating = false;
        self.exhausted = true;
        self.positions_to_remove = vec![];
    }
}
Introduce the words ranking rule working with the new search structures 2023-02-21 16:49:25 +08:00			`use std::collections::BTreeSet;`

			`use heed::RoTxn;`
			`use roaring::RoaringBitmap;`

			`use super::db_cache::DatabaseCache;`
Use a cache when resolving the query graph 2023-02-21 20:21:41 +08:00			`use super::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache};`
Introduce the words ranking rule working with the new search structures 2023-02-21 16:49:25 +08:00			`use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput};`
			`use crate::{Index, Result, TermsMatchingStrategy};`

			`pub struct Words {`
			`exhausted: bool,`
			`query_graph: Option<QueryGraph>,`
			`iterating: bool,`
			`positions_to_remove: Vec<i8>,`
			`terms_matching_strategy: TermsMatchingStrategy,`
Use a cache when resolving the query graph 2023-02-21 20:21:41 +08:00			`node_docids_cache: NodeDocIdsCache,`
Introduce the words ranking rule working with the new search structures 2023-02-21 16:49:25 +08:00			`}`
			`impl Words {`
			`pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {`
			`Self {`
			`exhausted: true,`
			`query_graph: None,`
			`iterating: false,`
			`positions_to_remove: vec![],`
			`terms_matching_strategy,`
Use a cache when resolving the query graph 2023-02-21 20:21:41 +08:00			`node_docids_cache: <_>::default(),`
Introduce the words ranking rule working with the new search structures 2023-02-21 16:49:25 +08:00			`}`
			`}`
			`}`

			`impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {`
			`fn start_iteration(`
			`&mut self,`
			`_index: &Index,`
			`_txn: &'transaction RoTxn,`
			`_db_cache: &mut DatabaseCache<'transaction>,`
			`parent_candidates: &RoaringBitmap,`
			`parent_query_graph: &QueryGraph,`
			`) -> Result<()> {`
			`// println!("Words: start iteration");`
			`self.exhausted = false;`
			`self.query_graph = Some(parent_query_graph.clone());`

			`// TODO: a phrase can contain many positions, but represents a single node.`
			`// That's a problem.`
			`let positions_to_remove = match self.terms_matching_strategy {`
			`TermsMatchingStrategy::Last => {`
			`let mut all_positions = BTreeSet::new();`
			`for n in parent_query_graph.nodes.iter() {`
			`match n {`
			`QueryNode::Term(term) => {`
			`all_positions.extend(term.positions.clone().into_iter());`
			`}`
			`QueryNode::Deleted \| QueryNode::Start \| QueryNode::End => {}`
			`}`
			`}`
			`all_positions.into_iter().collect()`
			`}`
			`TermsMatchingStrategy::All => vec![],`
			`};`
			`// println!("positions to remove: {positions_to_remove:?}");`
			`self.positions_to_remove = positions_to_remove;`
			`self.iterating = true;`
			`Ok(())`
			`}`

			`fn next_bucket(`
			`&mut self,`
			`index: &Index,`
			`txn: &'transaction RoTxn,`
			`db_cache: &mut DatabaseCache<'transaction>,`
			`universe: &RoaringBitmap,`
			`) -> Result<Option<RankingRuleOutput<QueryGraph>>> {`
			`// println!("Words: next bucket");`
			`assert!(self.iterating);`
			`assert!(universe.len() > 1);`
			`if self.exhausted {`
			`return Ok(None);`
			`}`
			`let Some(query_graph) = &mut self.query_graph else { panic!() };`
Remove noise in codebase 2023-02-21 20:57:34 +08:00
Use a cache when resolving the query graph 2023-02-21 20:21:41 +08:00			`let this_bucket = resolve_query_graph(`
			`index,`
			`txn,`
			`db_cache,`
			`&mut self.node_docids_cache,`
			`query_graph,`
			`universe,`
			`)?;`
Remove noise in codebase 2023-02-21 20:57:34 +08:00
Introduce the words ranking rule working with the new search structures 2023-02-21 16:49:25 +08:00			`let child_query_graph = query_graph.clone();`
			`// TODO: Check whether a position exists in the graph before removing it and`
			`// returning the next bucket.`
			`// while graph.does_not_contain(positions_to_remove.last()) { positions_to_remove.pop() }`
			`if self.positions_to_remove.is_empty() {`
			`self.exhausted = true;`
			`} else {`
			`let position_to_remove = self.positions_to_remove.pop().unwrap();`
			`query_graph.remove_words_at_position(position_to_remove);`
			`}`

			`Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket }))`
			`}`

			`fn end_iteration(`
			`&mut self,`
			`_index: &Index,`
			`_txn: &'transaction RoTxn,`
			`_db_cache: &mut DatabaseCache<'transaction>,`
			`) {`
			`// println!("Words: end iteration");`
			`self.iterating = false;`
			`self.exhausted = true;`
			`self.positions_to_remove = vec![];`
			`}`
			`}`