From 7b1d8f4c6d78284cb3b68fa04d3369616f219c3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 16 Mar 2023 09:58:59 +0100 Subject: [PATCH] Make PathSet strongly typed --- .../search/new/graph_based_ranking_rule.rs | 8 ++-- milli/src/search/new/interner.rs | 42 +++++++------------ milli/src/search/new/logger/detailed.rs | 14 +++---- milli/src/search/new/logger/mod.rs | 10 ++--- milli/src/search/new/query_graph.rs | 14 +++---- .../new/ranking_rule_graph/cheapest_paths.rs | 26 +++++++----- .../ranking_rule_graph/empty_paths_cache.rs | 12 +++--- .../src/search/new/ranking_rule_graph/mod.rs | 2 +- .../search/new/ranking_rule_graph/path_set.rs | 40 ++++++++++++++---- .../new/ranking_rule_graph/proximity/mod.rs | 2 +- .../search/new/ranking_rule_graph/typo/mod.rs | 2 +- milli/src/search/new/small_bitmap.rs | 17 ++++---- 12 files changed, 102 insertions(+), 87 deletions(-) diff --git a/milli/src/search/new/graph_based_ranking_rule.rs b/milli/src/search/new/graph_based_ranking_rule.rs index 5f270de6a..cfa0f059c 100644 --- a/milli/src/search/new/graph_based_ranking_rule.rs +++ b/milli/src/search/new/graph_based_ranking_rule.rs @@ -50,7 +50,6 @@ use super::ranking_rule_graph::{ }; use super::small_bitmap::SmallBitmap; use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; -use crate::search::new::interner::Interned; use crate::search::new::query_graph::QueryNodeData; use crate::Result; @@ -247,9 +246,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase let mut cached_edge_docids = vec![]; // graph.conditions_interner.map(|_| RoaringBitmap::new()); - for &condition_interned_raw in path { - let condition = Interned::new(condition_interned_raw); - visited_conditions.push(condition_interned_raw); + for &condition in path { + visited_conditions.push(condition); let edge_docids = edge_docids_cache.get_edge_docids(ctx, condition, graph, &universe)?; @@ -295,7 +293,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase } assert!(!path_docids.is_empty()); for condition in path { - used_conditions.insert(Interned::new(*condition)); + used_conditions.insert(*condition); } bucket |= &path_docids; // Reduce the size of the universe so that we can more optimistically discard candidate paths diff --git a/milli/src/search/new/interner.rs b/milli/src/search/new/interner.rs index da8473e92..f4dc4a870 100644 --- a/milli/src/search/new/interner.rs +++ b/milli/src/search/new/interner.rs @@ -4,38 +4,27 @@ use std::marker::PhantomData; use fxhash::FxHashMap; -/// An index within a [`Interner`] structure. +/// An index within an interner ([`FixedSizeInterner`], [`DedupInterner`], or [`MappedInterner`]). pub struct Interned { idx: u16, _phantom: PhantomData, } impl Interned { - pub fn new(idx: u16) -> Self { + /// Create an interned value manually from its raw index within the interner. + pub fn from_raw(idx: u16) -> Self { Self { idx, _phantom: PhantomData } } - pub fn into_inner(self) -> u16 { + /// Get the raw index from the interned value + pub fn into_raw(self) -> u16 { self.idx } } -// TODO: the stable store should be replaced by a bump allocator -// and the interned value should be a pointer wrapper -// then we can get its value with `interned.get()` instead of `interner.get(interned)` -// and as a bonus, its validity is tracked with Rust's lifetime system -// one problem is that we need two lifetimes: one for the bump allocator, one for the -// hashmap -// but that's okay, we can use: -// ``` -// struct Interner<'bump> { -// bump: &'bump Bump, -// lookup: FxHashMap -// } -// ``` - -/// An [`Interner`] is used to store a unique copy of a value of type `T`. This value +/// A [`DedupInterner`] is used to store a unique copy of a value of type `T`. This value /// is then identified by a lightweight index of type [`Interned`], which can /// be copied, compared, and hashed efficiently. An immutable reference to the original value -/// can be retrieved using `self.get(interned)`. +/// can be retrieved using `self.get(interned)`. A set of values within the interner can be +/// efficiently managed using [`SmallBitmap`](super::small_bitmap::SmallBitmap). #[derive(Clone)] pub struct DedupInterner { stable_store: Vec, @@ -47,6 +36,7 @@ impl Default for DedupInterner { } } impl DedupInterner { + /// pub fn freeze(self) -> FixedSizeInterner { FixedSizeInterner { stable_store: self.stable_store } } @@ -62,7 +52,7 @@ where } else { assert!(self.stable_store.len() < u16::MAX as usize); self.stable_store.push(s.clone()); - let interned = Interned::new(self.stable_store.len() as u16 - 1); + let interned = Interned::from_raw(self.stable_store.len() as u16 - 1); self.lookup.insert(s, interned); interned } @@ -87,7 +77,7 @@ impl Interner { pub fn push(&mut self, s: T) -> Interned { assert!(self.stable_store.len() < u16::MAX as usize); self.stable_store.push(s); - Interned::new(self.stable_store.len() as u16 - 1) + Interned::from_raw(self.stable_store.len() as u16 - 1) } } @@ -123,13 +113,13 @@ impl FixedSizeInterner { } } pub fn indexes(&self) -> impl Iterator> { - (0..self.stable_store.len()).map(|i| Interned::new(i as u16)) + (0..self.stable_store.len()).map(|i| Interned::from_raw(i as u16)) } pub fn iter(&self) -> impl Iterator, &T)> { - self.stable_store.iter().enumerate().map(|(i, x)| (Interned::new(i as u16), x)) + self.stable_store.iter().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x)) } pub fn iter_mut(&mut self) -> impl Iterator, &mut T)> { - self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::new(i as u16), x)) + self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x)) } } #[derive(Clone)] @@ -152,10 +142,10 @@ impl MappedInterner { } } pub fn iter(&self) -> impl Iterator, &T)> { - self.stable_store.iter().enumerate().map(|(i, x)| (Interned::new(i as u16), x)) + self.stable_store.iter().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x)) } pub fn iter_mut(&mut self) -> impl Iterator, &mut T)> { - self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::new(i as u16), x)) + self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x)) } } // Interned boilerplate implementations diff --git a/milli/src/search/new/logger/detailed.rs b/milli/src/search/new/logger/detailed.rs index 6b62c63b5..752e5ce35 100644 --- a/milli/src/search/new/logger/detailed.rs +++ b/milli/src/search/new/logger/detailed.rs @@ -43,7 +43,7 @@ pub enum SearchEvents { }, ProximityState { graph: RankingRuleGraph, - paths: Vec>, + paths: Vec>>, empty_paths_cache: DeadEndPathCache, universe: RoaringBitmap, distances: MappedInterner)>, QueryNode>, @@ -51,7 +51,7 @@ pub enum SearchEvents { }, TypoState { graph: RankingRuleGraph, - paths: Vec>, + paths: Vec>>, empty_paths_cache: DeadEndPathCache, universe: RoaringBitmap, distances: MappedInterner)>, QueryNode>, @@ -169,7 +169,7 @@ impl SearchLogger for DetailedSearchLogger { fn log_proximity_state( &mut self, query_graph: &RankingRuleGraph, - paths_map: &[Vec], + paths_map: &[Vec>], empty_paths_cache: &DeadEndPathCache, universe: &RoaringBitmap, distances: &MappedInterner)>, QueryNode>, @@ -188,7 +188,7 @@ impl SearchLogger for DetailedSearchLogger { fn log_typo_state( &mut self, query_graph: &RankingRuleGraph, - paths_map: &[Vec], + paths_map: &[Vec>], empty_paths_cache: &DeadEndPathCache, universe: &RoaringBitmap, distances: &MappedInterner)>, QueryNode>, @@ -527,7 +527,7 @@ shape: class" fn ranking_rule_graph_d2_description( ctx: &mut SearchContext, graph: &RankingRuleGraph, - paths: &[Vec], + paths: &[Vec>], dead_end_paths_cache: &DeadEndPathCache, distances: MappedInterner)>, QueryNode>, file: &mut File, @@ -613,13 +613,13 @@ shape: class fn paths_d2_description( ctx: &mut SearchContext, graph: &RankingRuleGraph, - paths: &[Vec], + paths: &[Vec>], file: &mut File, ) { for (path_idx, condition_indexes) in paths.iter().enumerate() { writeln!(file, "{path_idx} {{").unwrap(); for condition in condition_indexes.iter() { - Self::condition_d2_description(ctx, graph, Interned::new(*condition), file); + Self::condition_d2_description(ctx, graph, *condition, file); } for couple_edges in condition_indexes.windows(2) { let [src_edge_idx, dest_edge_idx] = couple_edges else { panic!() }; diff --git a/milli/src/search/new/logger/mod.rs b/milli/src/search/new/logger/mod.rs index c2e9bca80..e6c3931ed 100644 --- a/milli/src/search/new/logger/mod.rs +++ b/milli/src/search/new/logger/mod.rs @@ -3,7 +3,7 @@ pub mod detailed; use roaring::RoaringBitmap; -use super::interner::MappedInterner; +use super::interner::{Interned, MappedInterner}; use super::query_graph::QueryNode; use super::ranking_rule_graph::{ DeadEndPathCache, ProximityCondition, ProximityGraph, RankingRuleGraph, TypoEdge, TypoGraph, @@ -65,7 +65,7 @@ pub trait SearchLogger { fn log_proximity_state( &mut self, query_graph: &RankingRuleGraph, - paths: &[Vec], + paths: &[Vec>], empty_paths_cache: &DeadEndPathCache, universe: &RoaringBitmap, distances: &MappedInterner)>, QueryNode>, @@ -76,7 +76,7 @@ pub trait SearchLogger { fn log_typo_state( &mut self, query_graph: &RankingRuleGraph, - paths: &[Vec], + paths: &[Vec>], empty_paths_cache: &DeadEndPathCache, universe: &RoaringBitmap, distances: &MappedInterner)>, QueryNode>, @@ -136,7 +136,7 @@ impl SearchLogger for DefaultSearchLogger { fn log_proximity_state( &mut self, _query_graph: &RankingRuleGraph, - _paths_map: &[Vec], + _paths_map: &[Vec>], _empty_paths_cache: &DeadEndPathCache, _universe: &RoaringBitmap, _distances: &MappedInterner)>, QueryNode>, @@ -147,7 +147,7 @@ impl SearchLogger for DefaultSearchLogger { fn log_typo_state( &mut self, _query_graph: &RankingRuleGraph, - _paths: &[Vec], + _paths: &[Vec>], _empty_paths_cache: &DeadEndPathCache, _universe: &RoaringBitmap, _distances: &MappedInterner)>, QueryNode>, diff --git a/milli/src/search/new/query_graph.rs b/milli/src/search/new/query_graph.rs index 1012030be..863ec0045 100644 --- a/milli/src/search/new/query_graph.rs +++ b/milli/src/search/new/query_graph.rs @@ -181,8 +181,8 @@ impl QueryGraph { (prev0, prev1, prev2) = (new_nodes, prev0, prev1); } - let root_node = Interned::new(root_node); - let end_node = Interned::new(end_node); + let root_node = Interned::from_raw(root_node); + let end_node = Interned::from_raw(end_node); let mut nodes = FixedSizeInterner::new( nodes_data.len() as u16, QueryNode { @@ -197,22 +197,22 @@ impl QueryGraph { .zip(successors.into_iter()) .enumerate() { - let node = nodes.get_mut(Interned::new(node_idx as u16)); + let node = nodes.get_mut(Interned::from_raw(node_idx as u16)); node.data = node_data; for x in predecessors { - node.predecessors.insert(Interned::new(x)); + node.predecessors.insert(Interned::from_raw(x)); } for x in successors { - node.successors.insert(Interned::new(x)); + node.successors.insert(Interned::from_raw(x)); } } let mut graph = QueryGraph { root_node, end_node, nodes }; graph.connect_to_node( - prev0.into_iter().map(Interned::new).collect::>().as_slice(), + prev0.into_iter().map(Interned::from_raw).collect::>().as_slice(), end_node, ); - let empty_nodes = empty_nodes.into_iter().map(Interned::new).collect::>(); + let empty_nodes = empty_nodes.into_iter().map(Interned::from_raw).collect::>(); graph.remove_nodes_keep_edges(&empty_nodes); Ok(graph) diff --git a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs index c0697091e..52f199ca9 100644 --- a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs +++ b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs @@ -24,7 +24,11 @@ impl RankingRuleGraph { cost: u16, all_distances: &MappedInterner)>, QueryNode>, empty_paths_cache: &mut DeadEndPathCache, - mut visit: impl FnMut(&[u16], &mut Self, &mut DeadEndPathCache) -> Result>, + mut visit: impl FnMut( + &[Interned], + &mut Self, + &mut DeadEndPathCache, + ) -> Result>, ) -> Result<()> { let _ = self.visit_paths_of_cost_rec( from, @@ -44,8 +48,12 @@ impl RankingRuleGraph { cost: u16, all_distances: &MappedInterner)>, QueryNode>, empty_paths_cache: &mut DeadEndPathCache, - visit: &mut impl FnMut(&[u16], &mut Self, &mut DeadEndPathCache) -> Result>, - prev_conditions: &mut Vec, + visit: &mut impl FnMut( + &[Interned], + &mut Self, + &mut DeadEndPathCache, + ) -> Result>, + prev_conditions: &mut Vec>, cur_path: &mut SmallBitmap, forbidden_conditions: &mut SmallBitmap, ) -> Result { @@ -92,8 +100,7 @@ impl RankingRuleGraph { continue; } cur_path.insert(condition); - // TODO: typed path set - prev_conditions.push(condition.into_inner()); + prev_conditions.push(condition); let mut new_forbidden_conditions = forbidden_conditions.clone(); new_forbidden_conditions @@ -101,7 +108,7 @@ impl RankingRuleGraph { empty_paths_cache.prefixes.final_edges_after_prefix( prev_conditions, &mut |x| { - new_forbidden_conditions.insert(Interned::new(x)); + new_forbidden_conditions.insert(x); }, ); let next_any_valid = if edge.dest_node == self.query_graph.end_node { @@ -137,12 +144,11 @@ impl RankingRuleGraph { } forbidden_conditions.union(&empty_paths_cache.conditions); for prev_condition in prev_conditions.iter() { - forbidden_conditions.union( - empty_paths_cache.condition_couples.get(Interned::new(*prev_condition)), - ); + forbidden_conditions + .union(empty_paths_cache.condition_couples.get(*prev_condition)); } empty_paths_cache.prefixes.final_edges_after_prefix(prev_conditions, &mut |x| { - forbidden_conditions.insert(Interned::new(x)); + forbidden_conditions.insert(x); }); } } diff --git a/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs b/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs index 3b518bc9b..15346e929 100644 --- a/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs +++ b/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs @@ -11,7 +11,7 @@ pub struct DeadEndPathCache { /// The set of edge conditions that resolve to no documents. pub conditions: SmallBitmap, /// A set of path prefixes that resolve to no documents. - pub prefixes: PathSet, + pub prefixes: PathSet, /// A set of empty couples of edge conditions that resolve to no documents. pub condition_couples: MappedInterner, G::EdgeCondition>, } @@ -40,13 +40,13 @@ impl DeadEndPathCache { pub fn add_condition(&mut self, condition: Interned) { self.conditions.insert(condition); self.condition_couples.get_mut(condition).clear(); - self.prefixes.remove_edge(condition.into_inner()); // TODO: typed PathSet + self.prefixes.remove_edge(condition); for (_, edges2) in self.condition_couples.iter_mut() { edges2.remove(condition); } } /// Store in the cache that every path containing the given prefix resolves to no documents. - pub fn add_prefix(&mut self, prefix: &[u16]) { + pub fn add_prefix(&mut self, prefix: &[Interned]) { // TODO: typed PathSet self.prefixes.insert(prefix.iter().copied()); } @@ -63,15 +63,15 @@ impl DeadEndPathCache { /// Returns true if the cache can determine that the given path resolves to no documents. pub fn path_is_dead_end( &self, - path: &[u16], + path: &[Interned], path_bitmap: &SmallBitmap, ) -> bool { if path_bitmap.intersects(&self.conditions) { return true; } - for edge in path.iter() { + for condition in path.iter() { // TODO: typed path - let forbidden_other_edges = self.condition_couples.get(Interned::new(*edge)); + let forbidden_other_edges = self.condition_couples.get(*condition); if path_bitmap.intersects(forbidden_other_edges) { return true; } diff --git a/milli/src/search/new/ranking_rule_graph/mod.rs b/milli/src/search/new/ranking_rule_graph/mod.rs index 851aeae54..03e7c14bc 100644 --- a/milli/src/search/new/ranking_rule_graph/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/mod.rs @@ -117,7 +117,7 @@ pub trait RankingRuleGraphTrait: Sized { fn log_state( graph: &RankingRuleGraph, - paths: &[Vec], + paths: &[Vec>], empty_paths_cache: &DeadEndPathCache, universe: &RoaringBitmap, distances: &MappedInterner)>, QueryNode>, diff --git a/milli/src/search/new/ranking_rule_graph/path_set.rs b/milli/src/search/new/ranking_rule_graph/path_set.rs index d5bab6c14..04e6c9116 100644 --- a/milli/src/search/new/ranking_rule_graph/path_set.rs +++ b/milli/src/search/new/ranking_rule_graph/path_set.rs @@ -2,14 +2,34 @@ // For the empty_prefixes field in the EmptyPathsCache only :/ // but it could be used for more, like efficient computing of a set of paths +use crate::search::new::interner::Interned; + /// A set of [`Path`] -#[derive(Default, Debug, Clone)] -pub struct PathSet { - nodes: Vec<(u16, PathSet)>, +pub struct PathSet { + nodes: Vec<(Interned, Self)>, is_end: bool, } -impl PathSet { - pub fn insert(&mut self, mut edges: impl Iterator) { + +impl Clone for PathSet { + fn clone(&self) -> Self { + Self { nodes: self.nodes.clone(), is_end: self.is_end } + } +} + +impl std::fmt::Debug for PathSet { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PathSet").field("nodes", &self.nodes).field("is_end", &self.is_end).finish() + } +} + +impl Default for PathSet { + fn default() -> Self { + Self { nodes: Default::default(), is_end: Default::default() } + } +} + +impl PathSet { + pub fn insert(&mut self, mut edges: impl Iterator>) { match edges.next() { None => { self.is_end = true; @@ -27,7 +47,7 @@ impl PathSet { } } - pub fn remove_edge(&mut self, forbidden_edge: u16) { + pub fn remove_edge(&mut self, forbidden_edge: Interned) { let mut i = 0; while i < self.nodes.len() { let should_remove = if self.nodes[i].0 == forbidden_edge { @@ -46,7 +66,11 @@ impl PathSet { } } - pub fn final_edges_after_prefix(&self, prefix: &[u16], visit: &mut impl FnMut(u16)) { + pub fn final_edges_after_prefix( + &self, + prefix: &[Interned], + visit: &mut impl FnMut(Interned), + ) { let [first_edge, remaining_prefix @ ..] = prefix else { for node in self.nodes.iter() { if node.1.is_end { @@ -62,7 +86,7 @@ impl PathSet { } } - pub fn contains_prefix_of_path(&self, path: &[u16]) -> bool { + pub fn contains_prefix_of_path(&self, path: &[Interned]) -> bool { if self.is_end { return true; } diff --git a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs index 8fd8190f8..5a5c5cff8 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs @@ -66,7 +66,7 @@ impl RankingRuleGraphTrait for ProximityGraph { fn log_state( graph: &RankingRuleGraph, - paths: &[Vec], + paths: &[Vec>], empty_paths_cache: &DeadEndPathCache, universe: &RoaringBitmap, distances: &MappedInterner)>, QueryNode>, diff --git a/milli/src/search/new/ranking_rule_graph/typo/mod.rs b/milli/src/search/new/ranking_rule_graph/typo/mod.rs index abfea6499..d0b18ddc8 100644 --- a/milli/src/search/new/ranking_rule_graph/typo/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/typo/mod.rs @@ -137,7 +137,7 @@ impl RankingRuleGraphTrait for TypoGraph { fn log_state( graph: &RankingRuleGraph, - paths: &[Vec], + paths: &[Vec>], empty_paths_cache: &DeadEndPathCache, universe: &RoaringBitmap, distances: &MappedInterner)>, QueryNode>, diff --git a/milli/src/search/new/small_bitmap.rs b/milli/src/search/new/small_bitmap.rs index 7ab2b61ae..cb1c64ec2 100644 --- a/milli/src/search/new/small_bitmap.rs +++ b/milli/src/search/new/small_bitmap.rs @@ -32,10 +32,7 @@ impl SmallBitmap { for_interner: &FixedSizeInterner, ) -> Self { Self { - internal: SmallBitmapInternal::from_iter( - xs.map(|x| x.into_inner()), - for_interner.len(), - ), + internal: SmallBitmapInternal::from_iter(xs.map(|x| x.into_raw()), for_interner.len()), _phantom: PhantomData, } } @@ -46,13 +43,13 @@ impl SmallBitmap { self.internal.clear() } pub fn contains(&self, x: Interned) -> bool { - self.internal.contains(x.into_inner()) + self.internal.contains(x.into_raw()) } pub fn insert(&mut self, x: Interned) { - self.internal.insert(x.into_inner()) + self.internal.insert(x.into_raw()) } pub fn remove(&mut self, x: Interned) { - self.internal.remove(x.into_inner()) + self.internal.remove(x.into_raw()) } pub fn intersection(&mut self, other: &Self) { @@ -71,7 +68,7 @@ impl SmallBitmap { self.internal.intersects(&other.internal) } pub fn iter(&self) -> impl Iterator> + '_ { - self.internal.iter().map(|x| Interned::new(x)) + self.internal.iter().map(|x| Interned::from_raw(x)) } } #[derive(Clone)] @@ -80,14 +77,14 @@ pub enum SmallBitmapInternal { Small(Box<[u64]>), } impl SmallBitmapInternal { - pub fn new(universe_length: u16) -> Self { + fn new(universe_length: u16) -> Self { if universe_length <= 64 { Self::Tiny(0) } else { Self::Small(vec![0; 1 + universe_length as usize / 64].into_boxed_slice()) } } - pub fn from_iter(xs: impl Iterator, universe_length: u16) -> Self { + fn from_iter(xs: impl Iterator, universe_length: u16) -> Self { let mut s = Self::new(universe_length); for x in xs { s.insert(x);