From 23bf572dea9dfbac3bf5d0bdd4672851c4dd8c4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 21 Feb 2023 09:47:23 +0100 Subject: [PATCH] Introduce cache structures used with ranking rule graphs --- .../ranking_rule_graph/edge_docids_cache.rs | 55 +++++++++++++++++++ .../ranking_rule_graph/empty_paths_cache.rs | 23 ++++++++ 2 files changed, 78 insertions(+) create mode 100644 milli/src/search/new/ranking_rule_graph/edge_docids_cache.rs create mode 100644 milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs diff --git a/milli/src/search/new/ranking_rule_graph/edge_docids_cache.rs b/milli/src/search/new/ranking_rule_graph/edge_docids_cache.rs new file mode 100644 index 000000000..301810847 --- /dev/null +++ b/milli/src/search/new/ranking_rule_graph/edge_docids_cache.rs @@ -0,0 +1,55 @@ +use std::collections::HashMap; +use std::marker::PhantomData; + +use heed::RoTxn; +use roaring::RoaringBitmap; + +use super::{EdgeDetails, EdgeIndex, RankingRuleGraph, RankingRuleGraphTrait}; +use crate::new::db_cache::DatabaseCache; +use crate::new::BitmapOrAllRef; +use crate::{Index, Result}; + +pub struct EdgeDocidsCache { + pub cache: HashMap, + + // TODO: There is a big difference between `cache`, which is always valid, and + // `empty_path_prefixes`, which is only accurate for a particular universe + // ALSO, we should have a universe-specific `empty_edge` to use + // pub empty_path_prefixes: HashSet>, + _phantom: PhantomData, +} +impl Default for EdgeDocidsCache { + fn default() -> Self { + Self { + cache: Default::default(), + // empty_path_prefixes: Default::default(), + _phantom: Default::default(), + } + } +} +impl EdgeDocidsCache { + pub fn get_edge_docids<'s, 'transaction>( + &'s mut self, + index: &Index, + txn: &'transaction RoTxn, + db_cache: &mut DatabaseCache<'transaction>, + edge_index: &EdgeIndex, + graph: &RankingRuleGraph, + ) -> Result> { + if self.cache.contains_key(edge_index) { + return Ok(BitmapOrAllRef::Bitmap(&self.cache[edge_index])); + } + let edge = graph.get_edge(*edge_index).as_ref().unwrap(); + + match &edge.details { + EdgeDetails::Unconditional => Ok(BitmapOrAllRef::All), + EdgeDetails::Data(details) => { + let docids = G::compute_docids(index, txn, db_cache, details)?; + + let _ = self.cache.insert(*edge_index, docids); + let docids = &self.cache[edge_index]; + Ok(BitmapOrAllRef::Bitmap(docids)) + } + } + } +} diff --git a/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs b/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs new file mode 100644 index 000000000..989a08a0d --- /dev/null +++ b/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs @@ -0,0 +1,23 @@ +use std::collections::HashSet; + +use super::{paths_map::PathsMap, EdgeIndex}; + +#[derive(Default)] +pub struct EmptyPathsCache { + pub empty_edges: HashSet, + pub empty_prefixes: PathsMap<()>, +} +impl EmptyPathsCache { + pub fn path_is_empty(&self, path: &[EdgeIndex]) -> bool { + for edge in path { + // TODO: should be a bitmap intersection + if self.empty_edges.contains(edge) { + return true; + } + } + if self.empty_prefixes.contains_prefix_of_path(path) { + return true; + } + false + } +}