mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-31 15:31:53 +08:00
Refactor of the Interner
This commit is contained in:
parent
31628c5cd4
commit
e9cf58d584
@ -5,7 +5,7 @@ use fxhash::FxHashMap;
|
|||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
use heed::{BytesEncode, Database, RoTxn};
|
use heed::{BytesEncode, Database, RoTxn};
|
||||||
|
|
||||||
use super::interner::{Interned, Interner};
|
use super::interner::{DedupInterner, Interned};
|
||||||
use crate::{Index, Result};
|
use crate::{Index, Result};
|
||||||
|
|
||||||
/// A cache storing pointers to values in the LMDB databases.
|
/// A cache storing pointers to values in the LMDB databases.
|
||||||
@ -53,7 +53,7 @@ impl<'ctx> DatabaseCache<'ctx> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'ctx RoTxn,
|
txn: &'ctx RoTxn,
|
||||||
word_interner: &Interner<String>,
|
word_interner: &DedupInterner<String>,
|
||||||
word: Interned<String>,
|
word: Interned<String>,
|
||||||
) -> Result<Option<&'ctx [u8]>> {
|
) -> Result<Option<&'ctx [u8]>> {
|
||||||
Self::get_value(
|
Self::get_value(
|
||||||
@ -69,7 +69,7 @@ impl<'ctx> DatabaseCache<'ctx> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'ctx RoTxn,
|
txn: &'ctx RoTxn,
|
||||||
word_interner: &Interner<String>,
|
word_interner: &DedupInterner<String>,
|
||||||
prefix: Interned<String>,
|
prefix: Interned<String>,
|
||||||
) -> Result<Option<&'ctx [u8]>> {
|
) -> Result<Option<&'ctx [u8]>> {
|
||||||
Self::get_value(
|
Self::get_value(
|
||||||
@ -85,7 +85,7 @@ impl<'ctx> DatabaseCache<'ctx> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'ctx RoTxn,
|
txn: &'ctx RoTxn,
|
||||||
word_interner: &Interner<String>,
|
word_interner: &DedupInterner<String>,
|
||||||
word1: Interned<String>,
|
word1: Interned<String>,
|
||||||
word2: Interned<String>,
|
word2: Interned<String>,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
@ -103,7 +103,7 @@ impl<'ctx> DatabaseCache<'ctx> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'ctx RoTxn,
|
txn: &'ctx RoTxn,
|
||||||
word_interner: &Interner<String>,
|
word_interner: &DedupInterner<String>,
|
||||||
word1: Interned<String>,
|
word1: Interned<String>,
|
||||||
prefix2: Interned<String>,
|
prefix2: Interned<String>,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
@ -120,7 +120,7 @@ impl<'ctx> DatabaseCache<'ctx> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'ctx RoTxn,
|
txn: &'ctx RoTxn,
|
||||||
word_interner: &Interner<String>,
|
word_interner: &DedupInterner<String>,
|
||||||
left_prefix: Interned<String>,
|
left_prefix: Interned<String>,
|
||||||
right: Interned<String>,
|
right: Interned<String>,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
|
@ -38,13 +38,16 @@ That is we find the documents where either:
|
|||||||
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use super::interner::MappedInterner;
|
||||||
use super::logger::SearchLogger;
|
use super::logger::SearchLogger;
|
||||||
|
use super::query_graph::QueryNode;
|
||||||
use super::ranking_rule_graph::{
|
use super::ranking_rule_graph::{
|
||||||
EdgeCondition, EdgeConditionsCache, EmptyPathsCache, ProximityGraph, RankingRuleGraph,
|
DeadEndPathCache, EdgeCondition, EdgeConditionDocIdsCache, ProximityGraph, RankingRuleGraph,
|
||||||
RankingRuleGraphTrait, TypoGraph,
|
RankingRuleGraphTrait, TypoGraph,
|
||||||
};
|
};
|
||||||
use super::small_bitmap::SmallBitmap;
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
||||||
|
use crate::search::new::interner::Interned;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
pub type Proximity = GraphBasedRankingRule<ProximityGraph>;
|
pub type Proximity = GraphBasedRankingRule<ProximityGraph>;
|
||||||
@ -79,12 +82,12 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
|||||||
/// The current graph
|
/// The current graph
|
||||||
graph: RankingRuleGraph<G>,
|
graph: RankingRuleGraph<G>,
|
||||||
/// Cache to retrieve the docids associated with each edge
|
/// Cache to retrieve the docids associated with each edge
|
||||||
edge_conditions_cache: EdgeConditionsCache<G>,
|
edge_conditions_cache: EdgeConditionDocIdsCache<G>,
|
||||||
/// Cache used to optimistically discard paths that resolve to no documents.
|
/// Cache used to optimistically discard paths that resolve to no documents.
|
||||||
empty_paths_cache: EmptyPathsCache,
|
empty_paths_cache: DeadEndPathCache<G>,
|
||||||
/// A structure giving the list of possible costs from each node to the end node,
|
/// A structure giving the list of possible costs from each node to the end node,
|
||||||
/// along with a set of unavoidable edges that must be traversed to achieve that distance.
|
/// along with a set of unavoidable edges that must be traversed to achieve that distance.
|
||||||
all_distances: Vec<Vec<(u16, SmallBitmap)>>,
|
all_distances: MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
|
||||||
/// An index in the first element of `all_distances`, giving the cost of the next bucket
|
/// An index in the first element of `all_distances`, giving the cost of the next bucket
|
||||||
cur_distance_idx: usize,
|
cur_distance_idx: usize,
|
||||||
}
|
}
|
||||||
@ -95,12 +98,12 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
|||||||
fn remove_empty_edges<'ctx, G: RankingRuleGraphTrait>(
|
fn remove_empty_edges<'ctx, G: RankingRuleGraphTrait>(
|
||||||
ctx: &mut SearchContext<'ctx>,
|
ctx: &mut SearchContext<'ctx>,
|
||||||
graph: &mut RankingRuleGraph<G>,
|
graph: &mut RankingRuleGraph<G>,
|
||||||
edge_docids_cache: &mut EdgeConditionsCache<G>,
|
edge_docids_cache: &mut EdgeConditionDocIdsCache<G>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
empty_paths_cache: &mut EmptyPathsCache,
|
empty_paths_cache: &mut DeadEndPathCache<G>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
for edge_index in 0..graph.edges_store.len() as u16 {
|
for edge_id in graph.edges_store.indexes() {
|
||||||
let Some(edge) = graph.edges_store[edge_index as usize].as_ref() else {
|
let Some(edge) = graph.edges_store.get(edge_id).as_ref() else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
let condition = edge.condition;
|
let condition = edge.condition;
|
||||||
@ -110,8 +113,8 @@ fn remove_empty_edges<'ctx, G: RankingRuleGraphTrait>(
|
|||||||
EdgeCondition::Conditional(condition) => {
|
EdgeCondition::Conditional(condition) => {
|
||||||
let docids = edge_docids_cache.get_edge_docids(ctx, condition, graph, universe)?;
|
let docids = edge_docids_cache.get_edge_docids(ctx, condition, graph, universe)?;
|
||||||
if docids.is_disjoint(universe) {
|
if docids.is_disjoint(universe) {
|
||||||
graph.remove_ranking_rule_edge(edge_index);
|
graph.remove_edges_with_condition(condition);
|
||||||
empty_paths_cache.forbid_edge(edge_index);
|
empty_paths_cache.add_condition(condition);
|
||||||
edge_docids_cache.cache.remove(&condition);
|
edge_docids_cache.cache.remove(&condition);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -133,8 +136,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
query_graph: &QueryGraph,
|
query_graph: &QueryGraph,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
||||||
let mut edge_docids_cache = EdgeConditionsCache::default();
|
let mut edge_docids_cache = EdgeConditionDocIdsCache::default();
|
||||||
let mut empty_paths_cache = EmptyPathsCache::new(graph.edges_store.len() as u16);
|
let mut empty_paths_cache = DeadEndPathCache::new(&graph.conditions_interner);
|
||||||
|
|
||||||
// First simplify the graph as much as possible, by computing the docids of the edges
|
// First simplify the graph as much as possible, by computing the docids of the edges
|
||||||
// within the rule's universe and removing the edges that have no associated docids.
|
// within the rule's universe and removing the edges that have no associated docids.
|
||||||
@ -187,7 +190,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
// If the cur_distance_idx does not point to a valid cost in the `all_distances`
|
// If the cur_distance_idx does not point to a valid cost in the `all_distances`
|
||||||
// structure, then we have computed all the buckets and can return.
|
// structure, then we have computed all the buckets and can return.
|
||||||
if state.cur_distance_idx
|
if state.cur_distance_idx
|
||||||
>= state.all_distances[state.graph.query_graph.root_node as usize].len()
|
>= state.all_distances.get(state.graph.query_graph.root_node).len()
|
||||||
{
|
{
|
||||||
self.state = None;
|
self.state = None;
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
@ -195,7 +198,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
|
|
||||||
// Retrieve the cost of the paths to compute
|
// Retrieve the cost of the paths to compute
|
||||||
let (cost, _) =
|
let (cost, _) =
|
||||||
state.all_distances[state.graph.query_graph.root_node as usize][state.cur_distance_idx];
|
state.all_distances.get(state.graph.query_graph.root_node)[state.cur_distance_idx];
|
||||||
state.cur_distance_idx += 1;
|
state.cur_distance_idx += 1;
|
||||||
|
|
||||||
let mut bucket = RoaringBitmap::new();
|
let mut bucket = RoaringBitmap::new();
|
||||||
@ -226,7 +229,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
// Updating the empty_paths_cache helps speed up the execution of `visit_paths_of_cost` and reduces
|
// Updating the empty_paths_cache helps speed up the execution of `visit_paths_of_cost` and reduces
|
||||||
// the number of future candidate paths given by that same function.
|
// the number of future candidate paths given by that same function.
|
||||||
graph.visit_paths_of_cost(
|
graph.visit_paths_of_cost(
|
||||||
graph.query_graph.root_node as usize,
|
graph.query_graph.root_node,
|
||||||
cost,
|
cost,
|
||||||
all_distances,
|
all_distances,
|
||||||
empty_paths_cache,
|
empty_paths_cache,
|
||||||
@ -237,29 +240,27 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
|
|
||||||
// We store the edges and their docids in vectors in case the path turns out to be
|
// We store the edges and their docids in vectors in case the path turns out to be
|
||||||
// empty and we need to figure out why it was empty.
|
// empty and we need to figure out why it was empty.
|
||||||
let mut visited_edges = vec![];
|
let mut visited_conditions = vec![];
|
||||||
let mut cached_edge_docids = vec![];
|
let mut cached_edge_docids =
|
||||||
|
graph.conditions_interner.map(|_| RoaringBitmap::new());
|
||||||
|
|
||||||
for &edge_index in path {
|
for &condition_interned_raw in path {
|
||||||
visited_edges.push(edge_index);
|
let condition = Interned::new(condition_interned_raw);
|
||||||
let edge = graph.edges_store[edge_index as usize].as_ref().unwrap();
|
visited_conditions.push(condition_interned_raw);
|
||||||
let condition = match edge.condition {
|
|
||||||
EdgeCondition::Unconditional => continue,
|
|
||||||
EdgeCondition::Conditional(condition) => condition,
|
|
||||||
};
|
|
||||||
|
|
||||||
let edge_docids =
|
let edge_docids =
|
||||||
edge_docids_cache.get_edge_docids(ctx, condition, graph, &universe)?;
|
edge_docids_cache.get_edge_docids(ctx, condition, graph, &universe)?;
|
||||||
|
|
||||||
cached_edge_docids.push((edge_index, edge_docids.clone()));
|
*cached_edge_docids.get_mut(condition) = edge_docids.clone();
|
||||||
|
|
||||||
// If the edge is empty, then the path will be empty as well, we update the graph
|
// If the edge is empty, then the path will be empty as well, we update the graph
|
||||||
// and caches accordingly and skip to the next candidate path.
|
// and caches accordingly and skip to the next candidate path.
|
||||||
if edge_docids.is_disjoint(&universe) {
|
if edge_docids.is_disjoint(&universe) {
|
||||||
// 1. Store in the cache that this edge is empty for this universe
|
// 1. Store in the cache that this edge is empty for this universe
|
||||||
empty_paths_cache.forbid_edge(edge_index);
|
empty_paths_cache.add_condition(condition);
|
||||||
// 2. remove this edge from the ranking rule graph
|
// 2. remove this edge from the ranking rule graph
|
||||||
graph.remove_ranking_rule_edge(edge_index);
|
// ouch, no! :( need to link a condition to one or more ranking rule edges
|
||||||
|
graph.remove_edges_with_condition(condition);
|
||||||
// 3. Also remove the entry from the edge_docids_cache, since we don't need it anymore
|
// 3. Also remove the entry from the edge_docids_cache, since we don't need it anymore
|
||||||
edge_docids_cache.cache.remove(&condition);
|
edge_docids_cache.cache.remove(&condition);
|
||||||
return Ok(());
|
return Ok(());
|
||||||
@ -270,17 +271,18 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
if path_docids.is_disjoint(&universe) {
|
if path_docids.is_disjoint(&universe) {
|
||||||
// First, we know that this path is empty, and thus any path
|
// First, we know that this path is empty, and thus any path
|
||||||
// that is a superset of it will also be empty.
|
// that is a superset of it will also be empty.
|
||||||
empty_paths_cache.forbid_prefix(&visited_edges);
|
empty_paths_cache.add_prefix(&visited_conditions);
|
||||||
// Second, if the intersection between this edge and any
|
// Second, if the intersection between this edge and any
|
||||||
// previous one is disjoint with the universe,
|
// previous one is disjoint with the universe,
|
||||||
// then we also know that any path containing the same couple of
|
// then we also know that any path containing the same couple of
|
||||||
// edges will also be empty.
|
// edges will also be empty.
|
||||||
for (edge_index2, edge_docids2) in
|
for (past_condition, edge_docids2) in cached_edge_docids.iter() {
|
||||||
cached_edge_docids[..cached_edge_docids.len() - 1].iter()
|
if past_condition == condition {
|
||||||
{
|
continue;
|
||||||
|
};
|
||||||
let intersection = edge_docids & edge_docids2;
|
let intersection = edge_docids & edge_docids2;
|
||||||
if intersection.is_disjoint(&universe) {
|
if intersection.is_disjoint(&universe) {
|
||||||
empty_paths_cache.forbid_couple_edges(*edge_index2, edge_index);
|
empty_paths_cache.add_condition_couple(past_condition, condition);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// We should maybe instead try to compute:
|
// We should maybe instead try to compute:
|
||||||
@ -291,6 +293,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
bucket |= &path_docids;
|
bucket |= &path_docids;
|
||||||
// Reduce the size of the universe so that we can more optimistically discard candidate paths
|
// Reduce the size of the universe so that we can more optimistically discard candidate paths
|
||||||
universe -= path_docids;
|
universe -= path_docids;
|
||||||
|
// TODO: if the universe is empty, stop iterating
|
||||||
Ok(())
|
Ok(())
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use std::fmt;
|
||||||
use std::hash::Hash;
|
use std::hash::Hash;
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
@ -5,14 +6,16 @@ use fxhash::FxHashMap;
|
|||||||
|
|
||||||
/// An index within a [`Interner<T>`] structure.
|
/// An index within a [`Interner<T>`] structure.
|
||||||
pub struct Interned<T> {
|
pub struct Interned<T> {
|
||||||
idx: u32,
|
idx: u16,
|
||||||
_phantom: PhantomData<T>,
|
_phantom: PhantomData<T>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> Interned<T> {
|
impl<T> Interned<T> {
|
||||||
fn new(idx: u32) -> Self {
|
pub fn new(idx: u16) -> Self {
|
||||||
Self { idx, _phantom: PhantomData }
|
Self { idx, _phantom: PhantomData }
|
||||||
}
|
}
|
||||||
|
pub fn into_inner(self) -> u16 {
|
||||||
|
self.idx
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: the stable store should be replaced by a bump allocator
|
// TODO: the stable store should be replaced by a bump allocator
|
||||||
@ -34,17 +37,22 @@ impl<T> Interned<T> {
|
|||||||
/// be copied, compared, and hashed efficiently. An immutable reference to the original value
|
/// be copied, compared, and hashed efficiently. An immutable reference to the original value
|
||||||
/// can be retrieved using `self.get(interned)`.
|
/// can be retrieved using `self.get(interned)`.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Interner<T> {
|
pub struct DedupInterner<T> {
|
||||||
stable_store: Vec<T>,
|
stable_store: Vec<T>,
|
||||||
lookup: FxHashMap<T, Interned<T>>,
|
lookup: FxHashMap<T, Interned<T>>,
|
||||||
}
|
}
|
||||||
impl<T> Default for Interner<T> {
|
impl<T> Default for DedupInterner<T> {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self { stable_store: Default::default(), lookup: Default::default() }
|
Self { stable_store: Default::default(), lookup: Default::default() }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
impl<T> DedupInterner<T> {
|
||||||
|
pub fn freeze(self) -> FixedSizeInterner<T> {
|
||||||
|
FixedSizeInterner { stable_store: self.stable_store }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<T> Interner<T>
|
impl<T> DedupInterner<T>
|
||||||
where
|
where
|
||||||
T: Clone + Eq + Hash,
|
T: Clone + Eq + Hash,
|
||||||
{
|
{
|
||||||
@ -52,8 +60,9 @@ where
|
|||||||
if let Some(interned) = self.lookup.get(&s) {
|
if let Some(interned) = self.lookup.get(&s) {
|
||||||
*interned
|
*interned
|
||||||
} else {
|
} else {
|
||||||
|
assert!(self.stable_store.len() < u16::MAX as usize);
|
||||||
self.stable_store.push(s.clone());
|
self.stable_store.push(s.clone());
|
||||||
let interned = Interned::new(self.stable_store.len() as u32 - 1);
|
let interned = Interned::new(self.stable_store.len() as u16 - 1);
|
||||||
self.lookup.insert(s, interned);
|
self.lookup.insert(s, interned);
|
||||||
interned
|
interned
|
||||||
}
|
}
|
||||||
@ -62,7 +71,93 @@ where
|
|||||||
&self.stable_store[interned.idx as usize]
|
&self.stable_store[interned.idx as usize]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct Interner<T> {
|
||||||
|
stable_store: Vec<T>,
|
||||||
|
}
|
||||||
|
impl<T> Default for Interner<T> {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self { stable_store: Default::default() }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<T> Interner<T> {
|
||||||
|
pub fn freeze(self) -> FixedSizeInterner<T> {
|
||||||
|
FixedSizeInterner { stable_store: self.stable_store }
|
||||||
|
}
|
||||||
|
pub fn push(&mut self, s: T) -> Interned<T> {
|
||||||
|
assert!(self.stable_store.len() < u16::MAX as usize);
|
||||||
|
self.stable_store.push(s);
|
||||||
|
Interned::new(self.stable_store.len() as u16 - 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct FixedSizeInterner<T> {
|
||||||
|
stable_store: Vec<T>,
|
||||||
|
}
|
||||||
|
impl<T: Clone> FixedSizeInterner<T> {
|
||||||
|
pub fn new(length: u16, value: T) -> Self {
|
||||||
|
Self { stable_store: vec![value; length as usize] }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> FixedSizeInterner<T> {
|
||||||
|
pub fn from_vec(store: Vec<T>) -> Self {
|
||||||
|
Self { stable_store: store }
|
||||||
|
}
|
||||||
|
pub fn get(&self, interned: Interned<T>) -> &T {
|
||||||
|
&self.stable_store[interned.idx as usize]
|
||||||
|
}
|
||||||
|
pub fn get_mut(&mut self, interned: Interned<T>) -> &mut T {
|
||||||
|
&mut self.stable_store[interned.idx as usize]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn len(&self) -> u16 {
|
||||||
|
self.stable_store.len() as u16
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn map<U>(&self, map_f: impl Fn(&T) -> U) -> MappedInterner<U, T> {
|
||||||
|
MappedInterner {
|
||||||
|
stable_store: self.stable_store.iter().map(map_f).collect(),
|
||||||
|
_phantom: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn indexes(&self) -> impl Iterator<Item = Interned<T>> {
|
||||||
|
(0..self.stable_store.len()).map(|i| Interned::new(i as u16))
|
||||||
|
}
|
||||||
|
pub fn iter(&self) -> impl Iterator<Item = (Interned<T>, &T)> {
|
||||||
|
self.stable_store.iter().enumerate().map(|(i, x)| (Interned::new(i as u16), x))
|
||||||
|
}
|
||||||
|
pub fn iter_mut(&mut self) -> impl Iterator<Item = (Interned<T>, &mut T)> {
|
||||||
|
self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::new(i as u16), x))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct MappedInterner<T, From> {
|
||||||
|
stable_store: Vec<T>,
|
||||||
|
_phantom: PhantomData<From>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T, From> MappedInterner<T, From> {
|
||||||
|
pub fn get(&self, interned: Interned<From>) -> &T {
|
||||||
|
&self.stable_store[interned.idx as usize]
|
||||||
|
}
|
||||||
|
pub fn get_mut(&mut self, interned: Interned<From>) -> &mut T {
|
||||||
|
&mut self.stable_store[interned.idx as usize]
|
||||||
|
}
|
||||||
|
pub fn map<U>(&self, map_f: impl Fn(&T) -> U) -> MappedInterner<U, From> {
|
||||||
|
MappedInterner {
|
||||||
|
stable_store: self.stable_store.iter().map(map_f).collect(),
|
||||||
|
_phantom: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn iter(&self) -> impl Iterator<Item = (Interned<From>, &T)> {
|
||||||
|
self.stable_store.iter().enumerate().map(|(i, x)| (Interned::new(i as u16), x))
|
||||||
|
}
|
||||||
|
pub fn iter_mut(&mut self) -> impl Iterator<Item = (Interned<From>, &mut T)> {
|
||||||
|
self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::new(i as u16), x))
|
||||||
|
}
|
||||||
|
}
|
||||||
// Interned<T> boilerplate implementations
|
// Interned<T> boilerplate implementations
|
||||||
|
|
||||||
impl<T> Hash for Interned<T> {
|
impl<T> Hash for Interned<T> {
|
||||||
@ -97,3 +192,14 @@ impl<T> Clone for Interned<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<T> Copy for Interned<T> {}
|
impl<T> Copy for Interned<T> {}
|
||||||
|
|
||||||
|
impl<T> fmt::Display for Interned<T> {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
fmt::Display::fmt(&self.idx, f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<T> fmt::Debug for Interned<T> {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
fmt::Debug::fmt(&self.idx, f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -6,10 +6,12 @@ use std::time::Instant;
|
|||||||
use rand::random;
|
use rand::random;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::search::new::interner::{Interned, MappedInterner};
|
||||||
|
use crate::search::new::query_graph::QueryNodeData;
|
||||||
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm};
|
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm};
|
||||||
use crate::search::new::ranking_rule_graph::{
|
use crate::search::new::ranking_rule_graph::{
|
||||||
Edge, EdgeCondition, EmptyPathsCache, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait,
|
DeadEndPathCache, Edge, EdgeCondition, ProximityEdge, ProximityGraph, RankingRuleGraph,
|
||||||
TypoGraph,
|
RankingRuleGraphTrait, TypoEdge, TypoGraph,
|
||||||
};
|
};
|
||||||
use crate::search::new::small_bitmap::SmallBitmap;
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
|
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
|
||||||
@ -42,17 +44,17 @@ pub enum SearchEvents {
|
|||||||
ProximityState {
|
ProximityState {
|
||||||
graph: RankingRuleGraph<ProximityGraph>,
|
graph: RankingRuleGraph<ProximityGraph>,
|
||||||
paths: Vec<Vec<u16>>,
|
paths: Vec<Vec<u16>>,
|
||||||
empty_paths_cache: EmptyPathsCache,
|
empty_paths_cache: DeadEndPathCache<ProximityGraph>,
|
||||||
universe: RoaringBitmap,
|
universe: RoaringBitmap,
|
||||||
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
distances: MappedInterner<Vec<(u16, SmallBitmap<ProximityEdge>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
},
|
},
|
||||||
TypoState {
|
TypoState {
|
||||||
graph: RankingRuleGraph<TypoGraph>,
|
graph: RankingRuleGraph<TypoGraph>,
|
||||||
paths: Vec<Vec<u16>>,
|
paths: Vec<Vec<u16>>,
|
||||||
empty_paths_cache: EmptyPathsCache,
|
empty_paths_cache: DeadEndPathCache<TypoGraph>,
|
||||||
universe: RoaringBitmap,
|
universe: RoaringBitmap,
|
||||||
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
distances: MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
},
|
},
|
||||||
RankingRuleSkipBucket {
|
RankingRuleSkipBucket {
|
||||||
@ -168,9 +170,9 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
|||||||
&mut self,
|
&mut self,
|
||||||
query_graph: &RankingRuleGraph<ProximityGraph>,
|
query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||||
paths_map: &[Vec<u16>],
|
paths_map: &[Vec<u16>],
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &DeadEndPathCache<ProximityGraph>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityEdge>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
) {
|
) {
|
||||||
self.events.push(SearchEvents::ProximityState {
|
self.events.push(SearchEvents::ProximityState {
|
||||||
@ -178,7 +180,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
|||||||
paths: paths_map.to_vec(),
|
paths: paths_map.to_vec(),
|
||||||
empty_paths_cache: empty_paths_cache.clone(),
|
empty_paths_cache: empty_paths_cache.clone(),
|
||||||
universe: universe.clone(),
|
universe: universe.clone(),
|
||||||
distances,
|
distances: distances.clone(),
|
||||||
cost,
|
cost,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -187,9 +189,9 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
|||||||
&mut self,
|
&mut self,
|
||||||
query_graph: &RankingRuleGraph<TypoGraph>,
|
query_graph: &RankingRuleGraph<TypoGraph>,
|
||||||
paths_map: &[Vec<u16>],
|
paths_map: &[Vec<u16>],
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &DeadEndPathCache<TypoGraph>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
) {
|
) {
|
||||||
self.events.push(SearchEvents::TypoState {
|
self.events.push(SearchEvents::TypoState {
|
||||||
@ -197,7 +199,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
|||||||
paths: paths_map.to_vec(),
|
paths: paths_map.to_vec(),
|
||||||
empty_paths_cache: empty_paths_cache.clone(),
|
empty_paths_cache: empty_paths_cache.clone(),
|
||||||
universe: universe.clone(),
|
universe: universe.clone(),
|
||||||
distances,
|
distances: distances.clone(),
|
||||||
cost,
|
cost,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -424,15 +426,15 @@ results.{random} {{
|
|||||||
writeln!(&mut file, "}}").unwrap();
|
writeln!(&mut file, "}}").unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn query_node_d2_desc(
|
fn query_node_d2_desc<R: RankingRuleGraphTrait>(
|
||||||
ctx: &mut SearchContext,
|
ctx: &mut SearchContext,
|
||||||
node_idx: usize,
|
node_idx: Interned<QueryNode>,
|
||||||
node: &QueryNode,
|
node: &QueryNode,
|
||||||
distances: &[(u16, SmallBitmap)],
|
distances: &[(u16, SmallBitmap<R::EdgeCondition>)],
|
||||||
file: &mut File,
|
file: &mut File,
|
||||||
) {
|
) {
|
||||||
match &node {
|
match &node.data {
|
||||||
QueryNode::Term(LocatedQueryTerm { value, .. }) => {
|
QueryNodeData::Term(LocatedQueryTerm { value, .. }) => {
|
||||||
let QueryTerm {
|
let QueryTerm {
|
||||||
original,
|
original,
|
||||||
zero_typo,
|
zero_typo,
|
||||||
@ -496,11 +498,11 @@ shape: class"
|
|||||||
|
|
||||||
writeln!(file, "}}").unwrap();
|
writeln!(file, "}}").unwrap();
|
||||||
}
|
}
|
||||||
QueryNode::Deleted => panic!(),
|
QueryNodeData::Deleted => panic!(),
|
||||||
QueryNode::Start => {
|
QueryNodeData::Start => {
|
||||||
writeln!(file, "{node_idx} : START").unwrap();
|
writeln!(file, "{node_idx} : START").unwrap();
|
||||||
}
|
}
|
||||||
QueryNode::End => {
|
QueryNodeData::End => {
|
||||||
writeln!(file, "{node_idx} : END").unwrap();
|
writeln!(file, "{node_idx} : END").unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -511,14 +513,14 @@ shape: class"
|
|||||||
file: &mut File,
|
file: &mut File,
|
||||||
) {
|
) {
|
||||||
writeln!(file, "direction: right").unwrap();
|
writeln!(file, "direction: right").unwrap();
|
||||||
for node in 0..query_graph.nodes.len() {
|
for (node_id, node) in query_graph.nodes.iter() {
|
||||||
if matches!(query_graph.nodes[node], QueryNode::Deleted) {
|
if matches!(node.data, QueryNodeData::Deleted) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
Self::query_node_d2_desc(ctx, node, &query_graph.nodes[node], &[], file);
|
Self::query_node_d2_desc::<TypoGraph>(ctx, node_id, node, &[], file);
|
||||||
|
|
||||||
for edge in query_graph.edges[node].successors.iter() {
|
for edge in node.successors.iter() {
|
||||||
writeln!(file, "{node} -> {edge};\n").unwrap();
|
writeln!(file, "{node_id} -> {edge};\n").unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -526,30 +528,27 @@ shape: class"
|
|||||||
ctx: &mut SearchContext,
|
ctx: &mut SearchContext,
|
||||||
graph: &RankingRuleGraph<R>,
|
graph: &RankingRuleGraph<R>,
|
||||||
paths: &[Vec<u16>],
|
paths: &[Vec<u16>],
|
||||||
_empty_paths_cache: &EmptyPathsCache,
|
_empty_paths_cache: &DeadEndPathCache<R>,
|
||||||
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
distances: MappedInterner<Vec<(u16, SmallBitmap<R::EdgeCondition>)>, QueryNode>,
|
||||||
file: &mut File,
|
file: &mut File,
|
||||||
) {
|
) {
|
||||||
writeln!(file, "direction: right").unwrap();
|
writeln!(file, "direction: right").unwrap();
|
||||||
|
|
||||||
writeln!(file, "Proximity Graph {{").unwrap();
|
writeln!(file, "Proximity Graph {{").unwrap();
|
||||||
for (node_idx, node) in graph.query_graph.nodes.iter().enumerate() {
|
for (node_idx, node) in graph.query_graph.nodes.iter() {
|
||||||
if matches!(node, QueryNode::Deleted) {
|
if matches!(&node.data, QueryNodeData::Deleted) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let distances = &distances[node_idx];
|
let distances = &distances.get(node_idx);
|
||||||
Self::query_node_d2_desc(ctx, node_idx, node, distances.as_slice(), file);
|
Self::query_node_d2_desc::<R>(ctx, node_idx, node, distances, file);
|
||||||
}
|
}
|
||||||
for edge in graph.edges_store.iter().flatten() {
|
for (_edge_id, edge) in graph.edges_store.iter() {
|
||||||
let Edge { source_node, dest_node, condition: details, .. } = edge;
|
let Some(edge) = edge else { continue };
|
||||||
|
let Edge { source_node, dest_node, condition: details, cost } = edge;
|
||||||
|
|
||||||
match &details {
|
match &details {
|
||||||
EdgeCondition::Unconditional => {
|
EdgeCondition::Unconditional => {
|
||||||
writeln!(
|
writeln!(file, "{source_node} -> {dest_node} : \"always cost {cost}\"",)
|
||||||
file,
|
|
||||||
"{source_node} -> {dest_node} : \"always cost {cost}\"",
|
|
||||||
cost = edge.cost,
|
|
||||||
)
|
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
EdgeCondition::Conditional(condition) => {
|
EdgeCondition::Conditional(condition) => {
|
||||||
@ -590,39 +589,19 @@ shape: class"
|
|||||||
// }
|
// }
|
||||||
// writeln!(file, "}}").unwrap();
|
// writeln!(file, "}}").unwrap();
|
||||||
}
|
}
|
||||||
fn edge_d2_description<R: RankingRuleGraphTrait>(
|
fn condition_d2_description<R: RankingRuleGraphTrait>(
|
||||||
ctx: &mut SearchContext,
|
_ctx: &mut SearchContext,
|
||||||
graph: &RankingRuleGraph<R>,
|
graph: &RankingRuleGraph<R>,
|
||||||
edge_idx: u16,
|
condition_id: Interned<R::EdgeCondition>,
|
||||||
file: &mut File,
|
file: &mut File,
|
||||||
) {
|
) {
|
||||||
let Edge { source_node, dest_node, cost, .. } =
|
let condition = graph.conditions_interner.get(condition_id);
|
||||||
graph.edges_store[edge_idx as usize].as_ref().unwrap();
|
|
||||||
let source_node = &graph.query_graph.nodes[*source_node as usize];
|
|
||||||
let source_node_desc = match source_node {
|
|
||||||
QueryNode::Term(term) => {
|
|
||||||
let term = ctx.term_interner.get(term.value);
|
|
||||||
ctx.word_interner.get(term.original).to_owned()
|
|
||||||
}
|
|
||||||
QueryNode::Deleted => panic!(),
|
|
||||||
QueryNode::Start => "START".to_owned(),
|
|
||||||
QueryNode::End => "END".to_owned(),
|
|
||||||
};
|
|
||||||
let dest_node = &graph.query_graph.nodes[*dest_node as usize];
|
|
||||||
let dest_node_desc = match dest_node {
|
|
||||||
QueryNode::Term(term) => {
|
|
||||||
let term = ctx.term_interner.get(term.value);
|
|
||||||
ctx.word_interner.get(term.original).to_owned()
|
|
||||||
}
|
|
||||||
QueryNode::Deleted => panic!(),
|
|
||||||
QueryNode::Start => "START".to_owned(),
|
|
||||||
QueryNode::End => "END".to_owned(),
|
|
||||||
};
|
|
||||||
writeln!(
|
writeln!(
|
||||||
file,
|
file,
|
||||||
"{edge_idx}: \"{source_node_desc}->{dest_node_desc} [{cost}]\" {{
|
"{condition_id}: \"{}\" {{
|
||||||
shape: class
|
shape: class
|
||||||
}}"
|
}}",
|
||||||
|
R::label_for_edge_condition(condition)
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
@ -632,12 +611,12 @@ shape: class"
|
|||||||
paths: &[Vec<u16>],
|
paths: &[Vec<u16>],
|
||||||
file: &mut File,
|
file: &mut File,
|
||||||
) {
|
) {
|
||||||
for (path_idx, edge_indexes) in paths.iter().enumerate() {
|
for (path_idx, condition_indexes) in paths.iter().enumerate() {
|
||||||
writeln!(file, "{path_idx} {{").unwrap();
|
writeln!(file, "{path_idx} {{").unwrap();
|
||||||
for edge_idx in edge_indexes.iter() {
|
for condition in condition_indexes.iter() {
|
||||||
Self::edge_d2_description(ctx, graph, *edge_idx, file);
|
Self::condition_d2_description(ctx, graph, Interned::new(*condition), file);
|
||||||
}
|
}
|
||||||
for couple_edges in edge_indexes.windows(2) {
|
for couple_edges in condition_indexes.windows(2) {
|
||||||
let [src_edge_idx, dest_edge_idx] = couple_edges else { panic!() };
|
let [src_edge_idx, dest_edge_idx] = couple_edges else { panic!() };
|
||||||
writeln!(file, "{src_edge_idx} -> {dest_edge_idx}").unwrap();
|
writeln!(file, "{src_edge_idx} -> {dest_edge_idx}").unwrap();
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,11 @@ pub mod detailed;
|
|||||||
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::ranking_rule_graph::{EmptyPathsCache, ProximityGraph, RankingRuleGraph, TypoGraph};
|
use super::interner::MappedInterner;
|
||||||
|
use super::query_graph::QueryNode;
|
||||||
|
use super::ranking_rule_graph::{
|
||||||
|
DeadEndPathCache, ProximityEdge, ProximityGraph, RankingRuleGraph, TypoEdge, TypoGraph,
|
||||||
|
};
|
||||||
use super::small_bitmap::SmallBitmap;
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::{RankingRule, RankingRuleQueryTrait};
|
use super::{RankingRule, RankingRuleQueryTrait};
|
||||||
|
|
||||||
@ -62,9 +66,9 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
query_graph: &RankingRuleGraph<ProximityGraph>,
|
query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||||
paths: &[Vec<u16>],
|
paths: &[Vec<u16>],
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &DeadEndPathCache<ProximityGraph>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityEdge>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -73,9 +77,9 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
query_graph: &RankingRuleGraph<TypoGraph>,
|
query_graph: &RankingRuleGraph<TypoGraph>,
|
||||||
paths: &[Vec<u16>],
|
paths: &[Vec<u16>],
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &DeadEndPathCache<TypoGraph>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -133,9 +137,9 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
|||||||
&mut self,
|
&mut self,
|
||||||
_query_graph: &RankingRuleGraph<ProximityGraph>,
|
_query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||||
_paths_map: &[Vec<u16>],
|
_paths_map: &[Vec<u16>],
|
||||||
_empty_paths_cache: &EmptyPathsCache,
|
_empty_paths_cache: &DeadEndPathCache<ProximityGraph>,
|
||||||
_universe: &RoaringBitmap,
|
_universe: &RoaringBitmap,
|
||||||
_distances: Vec<Vec<(u16, SmallBitmap)>>,
|
_distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityEdge>)>, QueryNode>,
|
||||||
_cost: u16,
|
_cost: u16,
|
||||||
) {
|
) {
|
||||||
}
|
}
|
||||||
@ -144,9 +148,9 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
|||||||
&mut self,
|
&mut self,
|
||||||
_query_graph: &RankingRuleGraph<TypoGraph>,
|
_query_graph: &RankingRuleGraph<TypoGraph>,
|
||||||
_paths: &[Vec<u16>],
|
_paths: &[Vec<u16>],
|
||||||
_empty_paths_cache: &EmptyPathsCache,
|
_empty_paths_cache: &DeadEndPathCache<TypoGraph>,
|
||||||
_universe: &RoaringBitmap,
|
_universe: &RoaringBitmap,
|
||||||
_distances: Vec<Vec<(u16, SmallBitmap)>>,
|
_distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
||||||
_cost: u16,
|
_cost: u16,
|
||||||
) {
|
) {
|
||||||
}
|
}
|
||||||
|
@ -26,7 +26,8 @@ use query_graph::{QueryGraph, QueryNode};
|
|||||||
pub use ranking_rules::{bucket_sort, RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
|
pub use ranking_rules::{bucket_sort, RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use self::interner::Interner;
|
use self::interner::DedupInterner;
|
||||||
|
use self::query_graph::QueryNodeData;
|
||||||
use self::query_term::{Phrase, QueryTerm};
|
use self::query_term::{Phrase, QueryTerm};
|
||||||
use self::ranking_rules::PlaceholderQuery;
|
use self::ranking_rules::PlaceholderQuery;
|
||||||
use self::resolve_query_graph::{resolve_query_graph, QueryTermDocIdsCache};
|
use self::resolve_query_graph::{resolve_query_graph, QueryTermDocIdsCache};
|
||||||
@ -39,9 +40,9 @@ pub struct SearchContext<'ctx> {
|
|||||||
pub index: &'ctx Index,
|
pub index: &'ctx Index,
|
||||||
pub txn: &'ctx RoTxn<'ctx>,
|
pub txn: &'ctx RoTxn<'ctx>,
|
||||||
pub db_cache: DatabaseCache<'ctx>,
|
pub db_cache: DatabaseCache<'ctx>,
|
||||||
pub word_interner: Interner<String>,
|
pub word_interner: DedupInterner<String>,
|
||||||
pub phrase_interner: Interner<Phrase>,
|
pub phrase_interner: DedupInterner<Phrase>,
|
||||||
pub term_interner: Interner<QueryTerm>,
|
pub term_interner: DedupInterner<QueryTerm>,
|
||||||
pub term_docids: QueryTermDocIdsCache,
|
pub term_docids: QueryTermDocIdsCache,
|
||||||
}
|
}
|
||||||
impl<'ctx> SearchContext<'ctx> {
|
impl<'ctx> SearchContext<'ctx> {
|
||||||
@ -70,12 +71,12 @@ fn resolve_maximally_reduced_query_graph<'ctx>(
|
|||||||
let mut positions_to_remove = match matching_strategy {
|
let mut positions_to_remove = match matching_strategy {
|
||||||
TermsMatchingStrategy::Last => {
|
TermsMatchingStrategy::Last => {
|
||||||
let mut all_positions = BTreeSet::new();
|
let mut all_positions = BTreeSet::new();
|
||||||
for n in query_graph.nodes.iter() {
|
for (_, n) in query_graph.nodes.iter() {
|
||||||
match n {
|
match &n.data {
|
||||||
QueryNode::Term(term) => {
|
QueryNodeData::Term(term) => {
|
||||||
all_positions.extend(term.positions.clone().into_iter());
|
all_positions.extend(term.positions.clone().into_iter());
|
||||||
}
|
}
|
||||||
QueryNode::Deleted | QueryNode::Start | QueryNode::End => {}
|
QueryNodeData::Deleted | QueryNodeData::Start | QueryNodeData::End => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
all_positions.into_iter().collect()
|
all_positions.into_iter().collect()
|
||||||
@ -200,7 +201,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
asc.insert(field);
|
asc.insert(field);
|
||||||
todo!();
|
// TODO
|
||||||
}
|
}
|
||||||
crate::Criterion::Desc(field) => {
|
crate::Criterion::Desc(field) => {
|
||||||
if desc.contains(&field) {
|
if desc.contains(&field) {
|
||||||
@ -295,24 +296,27 @@ mod tests {
|
|||||||
|
|
||||||
println!("nbr docids: {}", index.documents_ids(&txn).unwrap().len());
|
println!("nbr docids: {}", index.documents_ids(&txn).unwrap().len());
|
||||||
|
|
||||||
// loop {
|
loop {
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
let mut logger = crate::search::new::logger::detailed::DetailedSearchLogger::new("log");
|
// let mut logger = crate::search::new::logger::detailed::DetailedSearchLogger::new("log");
|
||||||
let mut ctx = SearchContext::new(&index, &txn);
|
let mut ctx = SearchContext::new(&index, &txn);
|
||||||
let results = execute_search(
|
let results = execute_search(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
"sun flower s are the best",
|
// "which a the releases from poison by the government",
|
||||||
|
// "sun flower s are the best",
|
||||||
|
"zero config",
|
||||||
TermsMatchingStrategy::Last,
|
TermsMatchingStrategy::Last,
|
||||||
None,
|
None,
|
||||||
0,
|
0,
|
||||||
20,
|
20,
|
||||||
&mut DefaultSearchLogger,
|
&mut DefaultSearchLogger,
|
||||||
&mut logger,
|
&mut DefaultSearchLogger,
|
||||||
|
//&mut logger,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
logger.write_d2_description(&mut ctx);
|
// logger.write_d2_description(&mut ctx);
|
||||||
|
|
||||||
let elapsed = start.elapsed();
|
let elapsed = start.elapsed();
|
||||||
println!("{}us", elapsed.as_micros());
|
println!("{}us", elapsed.as_micros());
|
||||||
@ -333,7 +337,7 @@ mod tests {
|
|||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
println!("{}us: {:?}", elapsed.as_micros(), results);
|
println!("{}us: {:?}", elapsed.as_micros(), results);
|
||||||
// }
|
}
|
||||||
// for (id, _document) in documents {
|
// for (id, _document) in documents {
|
||||||
// println!("{id}:");
|
// println!("{id}:");
|
||||||
// // println!("{document}");
|
// // println!("{document}");
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
|
use super::interner::{FixedSizeInterner, Interned};
|
||||||
use super::query_term::{self, number_of_typos_allowed, LocatedQueryTerm};
|
use super::query_term::{self, number_of_typos_allowed, LocatedQueryTerm};
|
||||||
use super::small_bitmap::SmallBitmap;
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::SearchContext;
|
use super::SearchContext;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
pub const QUERY_GRAPH_NODE_LENGTH_LIMIT: u16 = 64;
|
|
||||||
|
|
||||||
/// A node of the [`QueryGraph`].
|
/// A node of the [`QueryGraph`].
|
||||||
///
|
///
|
||||||
/// There are four types of nodes:
|
/// There are four types of nodes:
|
||||||
@ -15,22 +16,19 @@ pub const QUERY_GRAPH_NODE_LENGTH_LIMIT: u16 = 64;
|
|||||||
/// 4. `Term` is a regular node representing a word or combination of words
|
/// 4. `Term` is a regular node representing a word or combination of words
|
||||||
/// from the user query.
|
/// from the user query.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub enum QueryNode {
|
pub struct QueryNode {
|
||||||
|
pub data: QueryNodeData,
|
||||||
|
pub predecessors: SmallBitmap<QueryNode>,
|
||||||
|
pub successors: SmallBitmap<QueryNode>,
|
||||||
|
}
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum QueryNodeData {
|
||||||
Term(LocatedQueryTerm),
|
Term(LocatedQueryTerm),
|
||||||
Deleted,
|
Deleted,
|
||||||
Start,
|
Start,
|
||||||
End,
|
End,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The edges associated with a node in the query graph.
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct Edges {
|
|
||||||
/// Set of nodes which have an edge going to the current node
|
|
||||||
pub predecessors: SmallBitmap,
|
|
||||||
/// Set of nodes which are reached by an edge from the current node
|
|
||||||
pub successors: SmallBitmap,
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
A graph representing all the ways to interpret the user's search query.
|
A graph representing all the ways to interpret the user's search query.
|
||||||
|
|
||||||
@ -78,55 +76,45 @@ and the transformations that were done on the query graph).
|
|||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct QueryGraph {
|
pub struct QueryGraph {
|
||||||
/// The index of the start node within `self.nodes`
|
/// The index of the start node within `self.nodes`
|
||||||
pub root_node: u16,
|
pub root_node: Interned<QueryNode>,
|
||||||
/// The index of the end node within `self.nodes`
|
/// The index of the end node within `self.nodes`
|
||||||
pub end_node: u16,
|
pub end_node: Interned<QueryNode>,
|
||||||
/// The list of all query nodes
|
/// The list of all query nodes
|
||||||
pub nodes: Vec<QueryNode>,
|
pub nodes: FixedSizeInterner<QueryNode>,
|
||||||
/// The list of all node edges
|
|
||||||
pub edges: Vec<Edges>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for QueryGraph {
|
// impl Default for QueryGraph {
|
||||||
/// Create a new QueryGraph with two disconnected nodes: the root and end nodes.
|
// /// Create a new QueryGraph with two disconnected nodes: the root and end nodes.
|
||||||
fn default() -> Self {
|
// fn default() -> Self {
|
||||||
let nodes = vec![QueryNode::Start, QueryNode::End];
|
// let nodes = vec![
|
||||||
let edges = vec![
|
// QueryNode {
|
||||||
Edges {
|
// data: QueryNodeData::Start,
|
||||||
predecessors: SmallBitmap::new(QUERY_GRAPH_NODE_LENGTH_LIMIT),
|
// predecessors: SmallBitmap::new(QUERY_GRAPH_NODE_LENGTH_LIMIT),
|
||||||
successors: SmallBitmap::new(QUERY_GRAPH_NODE_LENGTH_LIMIT),
|
// successors: SmallBitmap::new(QUERY_GRAPH_NODE_LENGTH_LIMIT),
|
||||||
},
|
// },
|
||||||
Edges {
|
// QueryNode {
|
||||||
predecessors: SmallBitmap::new(QUERY_GRAPH_NODE_LENGTH_LIMIT),
|
// data: QueryNodeData::End,
|
||||||
successors: SmallBitmap::new(QUERY_GRAPH_NODE_LENGTH_LIMIT),
|
// predecessors: SmallBitmap::new(QUERY_GRAPH_NODE_LENGTH_LIMIT),
|
||||||
},
|
// successors: SmallBitmap::new(QUERY_GRAPH_NODE_LENGTH_LIMIT),
|
||||||
];
|
// },
|
||||||
|
// ];
|
||||||
|
|
||||||
Self { root_node: 0, end_node: 1, nodes, edges }
|
// Self { root_node: 0, end_node: 1, nodes }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
impl QueryGraph {
|
impl QueryGraph {
|
||||||
/// Connect all the given predecessor nodes to the given successor node
|
/// Connect all the given predecessor nodes to the given successor node
|
||||||
fn connect_to_node(&mut self, from_nodes: &[u16], to_node: u16) {
|
fn connect_to_node(
|
||||||
|
&mut self,
|
||||||
|
from_nodes: &[Interned<QueryNode>],
|
||||||
|
to_node: Interned<QueryNode>,
|
||||||
|
) {
|
||||||
for &from_node in from_nodes {
|
for &from_node in from_nodes {
|
||||||
self.edges[from_node as usize].successors.insert(to_node);
|
self.nodes.get_mut(from_node).successors.insert(to_node);
|
||||||
self.edges[to_node as usize].predecessors.insert(from_node);
|
self.nodes.get_mut(to_node).predecessors.insert(from_node);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/// Add the given node to the graph and connect it to all the given predecessor nodes
|
|
||||||
fn add_node(&mut self, from_nodes: &[u16], node: QueryNode) -> u16 {
|
|
||||||
let new_node_idx = self.nodes.len() as u16;
|
|
||||||
assert!(new_node_idx <= QUERY_GRAPH_NODE_LENGTH_LIMIT);
|
|
||||||
self.nodes.push(node);
|
|
||||||
self.edges.push(Edges {
|
|
||||||
predecessors: SmallBitmap::new(QUERY_GRAPH_NODE_LENGTH_LIMIT),
|
|
||||||
successors: SmallBitmap::new(QUERY_GRAPH_NODE_LENGTH_LIMIT),
|
|
||||||
});
|
|
||||||
self.connect_to_node(from_nodes, new_node_idx);
|
|
||||||
|
|
||||||
new_node_idx
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl QueryGraph {
|
impl QueryGraph {
|
||||||
@ -136,17 +124,27 @@ impl QueryGraph {
|
|||||||
|
|
||||||
let mut empty_nodes = vec![];
|
let mut empty_nodes = vec![];
|
||||||
|
|
||||||
let mut graph = QueryGraph::default();
|
let mut predecessors: Vec<HashSet<u16>> = vec![HashSet::new(), HashSet::new()];
|
||||||
|
let mut successors: Vec<HashSet<u16>> = vec![HashSet::new(), HashSet::new()];
|
||||||
|
let mut nodes_data: Vec<QueryNodeData> = vec![QueryNodeData::Start, QueryNodeData::End];
|
||||||
|
let root_node = 0;
|
||||||
|
let end_node = 1;
|
||||||
|
|
||||||
// TODO: we could consider generalizing to 4,5,6,7,etc. ngrams
|
// TODO: we could consider generalizing to 4,5,6,7,etc. ngrams
|
||||||
let (mut prev2, mut prev1, mut prev0): (Vec<u16>, Vec<u16>, Vec<u16>) =
|
let (mut prev2, mut prev1, mut prev0): (Vec<u16>, Vec<u16>, Vec<u16>) =
|
||||||
(vec![], vec![], vec![graph.root_node]);
|
(vec![], vec![], vec![root_node]);
|
||||||
|
|
||||||
for term_idx in 0..terms.len() {
|
for term_idx in 0..terms.len() {
|
||||||
let term0 = &terms[term_idx];
|
let term0 = &terms[term_idx];
|
||||||
|
|
||||||
let mut new_nodes = vec![];
|
let mut new_nodes = vec![];
|
||||||
let new_node_idx = graph.add_node(&prev0, QueryNode::Term(term0.clone()));
|
let new_node_idx = add_node(
|
||||||
|
&mut nodes_data,
|
||||||
|
QueryNodeData::Term(term0.clone()),
|
||||||
|
&prev0,
|
||||||
|
&mut successors,
|
||||||
|
&mut predecessors,
|
||||||
|
);
|
||||||
new_nodes.push(new_node_idx);
|
new_nodes.push(new_node_idx);
|
||||||
if term0.is_empty(&ctx.term_interner) {
|
if term0.is_empty(&ctx.term_interner) {
|
||||||
empty_nodes.push(new_node_idx);
|
empty_nodes.push(new_node_idx);
|
||||||
@ -156,7 +154,13 @@ impl QueryGraph {
|
|||||||
if let Some(ngram) =
|
if let Some(ngram) =
|
||||||
query_term::make_ngram(ctx, &terms[term_idx - 1..=term_idx], &nbr_typos)?
|
query_term::make_ngram(ctx, &terms[term_idx - 1..=term_idx], &nbr_typos)?
|
||||||
{
|
{
|
||||||
let ngram_idx = graph.add_node(&prev1, QueryNode::Term(ngram));
|
let ngram_idx = add_node(
|
||||||
|
&mut nodes_data,
|
||||||
|
QueryNodeData::Term(ngram),
|
||||||
|
&prev1,
|
||||||
|
&mut successors,
|
||||||
|
&mut predecessors,
|
||||||
|
);
|
||||||
new_nodes.push(ngram_idx);
|
new_nodes.push(ngram_idx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -164,53 +168,96 @@ impl QueryGraph {
|
|||||||
if let Some(ngram) =
|
if let Some(ngram) =
|
||||||
query_term::make_ngram(ctx, &terms[term_idx - 2..=term_idx], &nbr_typos)?
|
query_term::make_ngram(ctx, &terms[term_idx - 2..=term_idx], &nbr_typos)?
|
||||||
{
|
{
|
||||||
let ngram_idx = graph.add_node(&prev2, QueryNode::Term(ngram));
|
let ngram_idx = add_node(
|
||||||
|
&mut nodes_data,
|
||||||
|
QueryNodeData::Term(ngram),
|
||||||
|
&prev2,
|
||||||
|
&mut successors,
|
||||||
|
&mut predecessors,
|
||||||
|
);
|
||||||
new_nodes.push(ngram_idx);
|
new_nodes.push(ngram_idx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
(prev0, prev1, prev2) = (new_nodes, prev0, prev1);
|
(prev0, prev1, prev2) = (new_nodes, prev0, prev1);
|
||||||
}
|
}
|
||||||
graph.connect_to_node(&prev0, graph.end_node);
|
|
||||||
|
|
||||||
|
let root_node = Interned::new(root_node);
|
||||||
|
let end_node = Interned::new(end_node);
|
||||||
|
let mut nodes = FixedSizeInterner::new(
|
||||||
|
nodes_data.len() as u16,
|
||||||
|
QueryNode {
|
||||||
|
data: QueryNodeData::Deleted,
|
||||||
|
predecessors: SmallBitmap::new(nodes_data.len() as u16),
|
||||||
|
successors: SmallBitmap::new(nodes_data.len() as u16),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
for (node_idx, ((node_data, predecessors), successors)) in nodes_data
|
||||||
|
.into_iter()
|
||||||
|
.zip(predecessors.into_iter())
|
||||||
|
.zip(successors.into_iter())
|
||||||
|
.enumerate()
|
||||||
|
{
|
||||||
|
let node = nodes.get_mut(Interned::new(node_idx as u16));
|
||||||
|
node.data = node_data;
|
||||||
|
for x in predecessors {
|
||||||
|
node.predecessors.insert(Interned::new(x));
|
||||||
|
}
|
||||||
|
for x in successors {
|
||||||
|
node.successors.insert(Interned::new(x));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let mut graph = QueryGraph { root_node, end_node, nodes };
|
||||||
|
|
||||||
|
graph.connect_to_node(
|
||||||
|
prev0.into_iter().map(Interned::new).collect::<Vec<_>>().as_slice(),
|
||||||
|
end_node,
|
||||||
|
);
|
||||||
|
let empty_nodes = empty_nodes.into_iter().map(Interned::new).collect::<Vec<_>>();
|
||||||
graph.remove_nodes_keep_edges(&empty_nodes);
|
graph.remove_nodes_keep_edges(&empty_nodes);
|
||||||
|
|
||||||
Ok(graph)
|
Ok(graph)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Remove the given nodes and all their edges from the query graph.
|
/// Remove the given nodes and all their edges from the query graph.
|
||||||
pub fn remove_nodes(&mut self, nodes: &[u16]) {
|
pub fn remove_nodes(&mut self, nodes: &[Interned<QueryNode>]) {
|
||||||
for &node in nodes {
|
for &node_id in nodes {
|
||||||
self.nodes[node as usize] = QueryNode::Deleted;
|
let node = &self.nodes.get(node_id);
|
||||||
let edges = self.edges[node as usize].clone();
|
let old_node_pred = node.predecessors.clone();
|
||||||
for pred in edges.predecessors.iter() {
|
let old_node_succ = node.successors.clone();
|
||||||
self.edges[pred as usize].successors.remove(node);
|
|
||||||
|
for pred in old_node_pred.iter() {
|
||||||
|
self.nodes.get_mut(pred).successors.remove(node_id);
|
||||||
}
|
}
|
||||||
for succ in edges.successors.iter() {
|
for succ in old_node_succ.iter() {
|
||||||
self.edges[succ as usize].predecessors.remove(node);
|
self.nodes.get_mut(succ).predecessors.remove(node_id);
|
||||||
}
|
}
|
||||||
self.edges[node as usize] = Edges {
|
|
||||||
predecessors: SmallBitmap::new(QUERY_GRAPH_NODE_LENGTH_LIMIT),
|
let node = self.nodes.get_mut(node_id);
|
||||||
successors: SmallBitmap::new(QUERY_GRAPH_NODE_LENGTH_LIMIT),
|
node.data = QueryNodeData::Deleted;
|
||||||
};
|
node.predecessors.clear();
|
||||||
|
node.successors.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/// Remove the given nodes, connecting all their predecessors to all their successors.
|
/// Remove the given nodes, connecting all their predecessors to all their successors.
|
||||||
pub fn remove_nodes_keep_edges(&mut self, nodes: &[u16]) {
|
pub fn remove_nodes_keep_edges(&mut self, nodes: &[Interned<QueryNode>]) {
|
||||||
for &node in nodes {
|
for &node_id in nodes {
|
||||||
self.nodes[node as usize] = QueryNode::Deleted;
|
let node = self.nodes.get(node_id);
|
||||||
let edges = self.edges[node as usize].clone();
|
let old_node_pred = node.predecessors.clone();
|
||||||
for pred in edges.predecessors.iter() {
|
let old_node_succ = node.successors.clone();
|
||||||
self.edges[pred as usize].successors.remove(node);
|
for pred in old_node_pred.iter() {
|
||||||
self.edges[pred as usize].successors.union(&edges.successors);
|
let pred_successors = &mut self.nodes.get_mut(pred).successors;
|
||||||
|
pred_successors.remove(node_id);
|
||||||
|
pred_successors.union(&old_node_succ);
|
||||||
}
|
}
|
||||||
for succ in edges.successors.iter() {
|
for succ in old_node_succ.iter() {
|
||||||
self.edges[succ as usize].predecessors.remove(node);
|
let succ_predecessors = &mut self.nodes.get_mut(succ).predecessors;
|
||||||
self.edges[succ as usize].predecessors.union(&edges.predecessors);
|
succ_predecessors.remove(node_id);
|
||||||
|
succ_predecessors.union(&old_node_pred);
|
||||||
}
|
}
|
||||||
self.edges[node as usize] = Edges {
|
let node = self.nodes.get_mut(node_id);
|
||||||
predecessors: SmallBitmap::new(QUERY_GRAPH_NODE_LENGTH_LIMIT),
|
node.data = QueryNodeData::Deleted;
|
||||||
successors: SmallBitmap::new(QUERY_GRAPH_NODE_LENGTH_LIMIT),
|
node.predecessors.clear();
|
||||||
};
|
node.successors.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -219,9 +266,8 @@ impl QueryGraph {
|
|||||||
/// Return `true` if any node was removed.
|
/// Return `true` if any node was removed.
|
||||||
pub fn remove_words_starting_at_position(&mut self, position: i8) -> bool {
|
pub fn remove_words_starting_at_position(&mut self, position: i8) -> bool {
|
||||||
let mut nodes_to_remove_keeping_edges = vec![];
|
let mut nodes_to_remove_keeping_edges = vec![];
|
||||||
for (node_idx, node) in self.nodes.iter().enumerate() {
|
for (node_idx, node) in self.nodes.iter() {
|
||||||
let node_idx = node_idx as u16;
|
let QueryNodeData::Term(LocatedQueryTerm { value: _, positions }) = &node.data else { continue };
|
||||||
let QueryNode::Term(LocatedQueryTerm { value: _, positions }) = node else { continue };
|
|
||||||
if positions.start() == &position {
|
if positions.start() == &position {
|
||||||
nodes_to_remove_keeping_edges.push(node_idx);
|
nodes_to_remove_keeping_edges.push(node_idx);
|
||||||
}
|
}
|
||||||
@ -238,13 +284,13 @@ impl QueryGraph {
|
|||||||
fn simplify(&mut self) {
|
fn simplify(&mut self) {
|
||||||
loop {
|
loop {
|
||||||
let mut nodes_to_remove = vec![];
|
let mut nodes_to_remove = vec![];
|
||||||
for (node_idx, node) in self.nodes.iter().enumerate() {
|
for (node_idx, node) in self.nodes.iter() {
|
||||||
if (!matches!(node, QueryNode::End | QueryNode::Deleted)
|
if (!matches!(node.data, QueryNodeData::End | QueryNodeData::Deleted)
|
||||||
&& self.edges[node_idx].successors.is_empty())
|
&& node.successors.is_empty())
|
||||||
|| (!matches!(node, QueryNode::Start | QueryNode::Deleted)
|
|| (!matches!(node.data, QueryNodeData::Start | QueryNodeData::Deleted)
|
||||||
&& self.edges[node_idx].predecessors.is_empty())
|
&& node.predecessors.is_empty())
|
||||||
{
|
{
|
||||||
nodes_to_remove.push(node_idx as u16);
|
nodes_to_remove.push(node_idx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if nodes_to_remove.is_empty() {
|
if nodes_to_remove.is_empty() {
|
||||||
@ -255,3 +301,21 @@ impl QueryGraph {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn add_node(
|
||||||
|
nodes_data: &mut Vec<QueryNodeData>,
|
||||||
|
node_data: QueryNodeData,
|
||||||
|
from_nodes: &Vec<u16>,
|
||||||
|
successors: &mut Vec<HashSet<u16>>,
|
||||||
|
predecessors: &mut Vec<HashSet<u16>>,
|
||||||
|
) -> u16 {
|
||||||
|
successors.push(HashSet::new());
|
||||||
|
predecessors.push(HashSet::new());
|
||||||
|
let new_node_idx = nodes_data.len() as u16;
|
||||||
|
nodes_data.push(node_data);
|
||||||
|
for &from_node in from_nodes {
|
||||||
|
successors[from_node as usize].insert(new_node_idx);
|
||||||
|
predecessors[new_node_idx as usize].insert(from_node);
|
||||||
|
}
|
||||||
|
new_node_idx
|
||||||
|
}
|
||||||
|
@ -9,7 +9,7 @@ use heed::types::DecodeIgnore;
|
|||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
|
|
||||||
use super::interner::{Interned, Interner};
|
use super::interner::{DedupInterner, Interned};
|
||||||
use super::SearchContext;
|
use super::SearchContext;
|
||||||
use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
|
use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
|
||||||
use crate::search::{build_dfa, get_first};
|
use crate::search::{build_dfa, get_first};
|
||||||
@ -22,7 +22,7 @@ pub struct Phrase {
|
|||||||
pub words: Vec<Option<Interned<String>>>,
|
pub words: Vec<Option<Interned<String>>>,
|
||||||
}
|
}
|
||||||
impl Phrase {
|
impl Phrase {
|
||||||
pub fn description(&self, interner: &Interner<String>) -> String {
|
pub fn description(&self, interner: &DedupInterner<String>) -> String {
|
||||||
self.words.iter().flatten().map(|w| interner.get(*w)).join(" ")
|
self.words.iter().flatten().map(|w| interner.get(*w)).join(" ")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -60,8 +60,8 @@ pub struct QueryTerm {
|
|||||||
}
|
}
|
||||||
impl QueryTerm {
|
impl QueryTerm {
|
||||||
pub fn phrase(
|
pub fn phrase(
|
||||||
word_interner: &mut Interner<String>,
|
word_interner: &mut DedupInterner<String>,
|
||||||
phrase_interner: &mut Interner<Phrase>,
|
phrase_interner: &mut DedupInterner<Phrase>,
|
||||||
phrase: Phrase,
|
phrase: Phrase,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
@ -78,7 +78,7 @@ impl QueryTerm {
|
|||||||
is_ngram: false,
|
is_ngram: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn empty(word_interner: &mut Interner<String>, original: &str) -> Self {
|
pub fn empty(word_interner: &mut DedupInterner<String>, original: &str) -> Self {
|
||||||
Self {
|
Self {
|
||||||
original: word_interner.insert(original.to_owned()),
|
original: word_interner.insert(original.to_owned()),
|
||||||
phrase: None,
|
phrase: None,
|
||||||
@ -313,7 +313,7 @@ pub struct LocatedQueryTerm {
|
|||||||
|
|
||||||
impl LocatedQueryTerm {
|
impl LocatedQueryTerm {
|
||||||
/// Return `true` iff the term is empty
|
/// Return `true` iff the term is empty
|
||||||
pub fn is_empty(&self, interner: &Interner<QueryTerm>) -> bool {
|
pub fn is_empty(&self, interner: &DedupInterner<QueryTerm>) -> bool {
|
||||||
interner.get(self.value).is_empty()
|
interner.get(self.value).is_empty()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
|
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
use crate::search::new::interner::Interner;
|
use crate::search::new::interner::{DedupInterner, Interner};
|
||||||
use crate::search::new::small_bitmap::SmallBitmap;
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
use crate::search::new::{QueryGraph, SearchContext};
|
use crate::search::new::{QueryGraph, SearchContext};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
@ -15,40 +15,43 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
|
|
||||||
/// Build the ranking rule graph from the given query graph
|
/// Build the ranking rule graph from the given query graph
|
||||||
pub fn build(ctx: &mut SearchContext, query_graph: QueryGraph) -> Result<Self> {
|
pub fn build(ctx: &mut SearchContext, query_graph: QueryGraph) -> Result<Self> {
|
||||||
let QueryGraph { nodes: graph_nodes, edges: graph_edges, .. } = &query_graph;
|
let QueryGraph { nodes: graph_nodes, .. } = &query_graph;
|
||||||
|
|
||||||
let mut conditions_interner = Interner::default();
|
let mut conditions_interner = DedupInterner::default();
|
||||||
|
|
||||||
let mut edges_store = vec![];
|
let mut edges_store = Interner::default();
|
||||||
let mut edges_of_node = vec![];
|
let mut edges_of_node = query_graph.nodes.map(|_| HashSet::new());
|
||||||
|
|
||||||
for (source_idx, source_node) in graph_nodes.iter().enumerate() {
|
for (source_id, source_node) in graph_nodes.iter() {
|
||||||
edges_of_node.push(HashSet::new());
|
let new_edges = edges_of_node.get_mut(source_id);
|
||||||
let new_edges = edges_of_node.last_mut().unwrap();
|
|
||||||
|
|
||||||
for dest_idx in graph_edges[source_idx].successors.iter() {
|
for dest_idx in source_node.successors.iter() {
|
||||||
let dest_node = &graph_nodes[dest_idx as usize];
|
let dest_node = graph_nodes.get(dest_idx);
|
||||||
let edges = G::build_edges(ctx, &mut conditions_interner, source_node, dest_node)?;
|
let edges = G::build_edges(ctx, &mut conditions_interner, source_node, dest_node)?;
|
||||||
if edges.is_empty() {
|
if edges.is_empty() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (cost, condition) in edges {
|
for (cost, condition) in edges {
|
||||||
edges_store.push(Some(Edge {
|
let new_edge_id = edges_store.push(Some(Edge {
|
||||||
source_node: source_idx as u16,
|
source_node: source_id,
|
||||||
dest_node: dest_idx,
|
dest_node: dest_idx,
|
||||||
cost,
|
cost,
|
||||||
condition,
|
condition,
|
||||||
}));
|
}));
|
||||||
new_edges.insert(edges_store.len() as u16 - 1);
|
new_edges.insert(new_edge_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let edges_of_node = edges_of_node
|
let edges_store = edges_store.freeze();
|
||||||
.into_iter()
|
let edges_of_node =
|
||||||
.map(|edges| SmallBitmap::from_iter(edges.into_iter(), edges_store.len() as u16))
|
edges_of_node.map(|edges| SmallBitmap::from_iter(edges.iter().copied(), &edges_store));
|
||||||
.collect();
|
|
||||||
|
|
||||||
Ok(RankingRuleGraph { query_graph, edges_store, edges_of_node, conditions_interner })
|
Ok(RankingRuleGraph {
|
||||||
|
query_graph,
|
||||||
|
edges_store,
|
||||||
|
edges_of_node,
|
||||||
|
conditions_interner: conditions_interner.freeze(),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,8 +3,10 @@
|
|||||||
use std::collections::btree_map::Entry;
|
use std::collections::btree_map::Entry;
|
||||||
use std::collections::{BTreeMap, VecDeque};
|
use std::collections::{BTreeMap, VecDeque};
|
||||||
|
|
||||||
use super::empty_paths_cache::EmptyPathsCache;
|
use super::empty_paths_cache::DeadEndPathCache;
|
||||||
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
|
use crate::search::new::interner::{Interned, MappedInterner};
|
||||||
|
use crate::search::new::query_graph::QueryNode;
|
||||||
use crate::search::new::small_bitmap::SmallBitmap;
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
@ -17,11 +19,11 @@ pub struct Path {
|
|||||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||||
pub fn visit_paths_of_cost(
|
pub fn visit_paths_of_cost(
|
||||||
&mut self,
|
&mut self,
|
||||||
from: usize,
|
from: Interned<QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
all_distances: &[Vec<(u16, SmallBitmap)>],
|
all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
|
||||||
empty_paths_cache: &mut EmptyPathsCache,
|
empty_paths_cache: &mut DeadEndPathCache<G>,
|
||||||
mut visit: impl FnMut(&[u16], &mut Self, &mut EmptyPathsCache) -> Result<()>,
|
mut visit: impl FnMut(&[u16], &mut Self, &mut DeadEndPathCache<G>) -> Result<()>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let _ = self.visit_paths_of_cost_rec(
|
let _ = self.visit_paths_of_cost_rec(
|
||||||
from,
|
from,
|
||||||
@ -30,76 +32,108 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
empty_paths_cache,
|
empty_paths_cache,
|
||||||
&mut visit,
|
&mut visit,
|
||||||
&mut vec![],
|
&mut vec![],
|
||||||
&mut SmallBitmap::new(self.edges_store.len() as u16),
|
&mut SmallBitmap::new(self.edges_store.len()),
|
||||||
empty_paths_cache.empty_edges.clone(),
|
&mut empty_paths_cache.conditions.clone(),
|
||||||
)?;
|
)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
pub fn visit_paths_of_cost_rec(
|
pub fn visit_paths_of_cost_rec(
|
||||||
&mut self,
|
&mut self,
|
||||||
from: usize,
|
from: Interned<QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
all_distances: &[Vec<(u16, SmallBitmap)>],
|
all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
|
||||||
empty_paths_cache: &mut EmptyPathsCache,
|
empty_paths_cache: &mut DeadEndPathCache<G>,
|
||||||
visit: &mut impl FnMut(&[u16], &mut Self, &mut EmptyPathsCache) -> Result<()>,
|
visit: &mut impl FnMut(&[u16], &mut Self, &mut DeadEndPathCache<G>) -> Result<()>,
|
||||||
prev_edges: &mut Vec<u16>,
|
prev_conditions: &mut Vec<u16>,
|
||||||
cur_path: &mut SmallBitmap,
|
cur_path: &mut SmallBitmap<G::EdgeCondition>,
|
||||||
mut forbidden_edges: SmallBitmap,
|
forbidden_conditions: &mut SmallBitmap<G::EdgeCondition>,
|
||||||
) -> Result<bool> {
|
) -> Result<bool> {
|
||||||
let mut any_valid = false;
|
let mut any_valid = false;
|
||||||
|
|
||||||
let edges = self.edges_of_node[from].clone();
|
let edges = self.edges_of_node.get(from).clone();
|
||||||
for edge_idx in edges.iter() {
|
for edge_idx in edges.iter() {
|
||||||
let Some(edge) = self.edges_store[edge_idx as usize].as_ref() else { continue };
|
let Some(edge) = self.edges_store.get(edge_idx).as_ref() else { continue };
|
||||||
if cost < edge.cost as u16
|
if cost < edge.cost as u16 {
|
||||||
|| forbidden_edges.contains(edge_idx)
|
continue;
|
||||||
|| !all_distances[edge.dest_node as usize].iter().any(
|
}
|
||||||
|(next_cost, necessary_edges)| {
|
let next_any_valid = match edge.condition {
|
||||||
|
EdgeCondition::Unconditional => {
|
||||||
|
if edge.dest_node == self.query_graph.end_node {
|
||||||
|
any_valid = true;
|
||||||
|
visit(prev_conditions, self, empty_paths_cache)?;
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
self.visit_paths_of_cost_rec(
|
||||||
|
edge.dest_node,
|
||||||
|
cost - edge.cost as u16,
|
||||||
|
all_distances,
|
||||||
|
empty_paths_cache,
|
||||||
|
visit,
|
||||||
|
prev_conditions,
|
||||||
|
cur_path,
|
||||||
|
forbidden_conditions,
|
||||||
|
)?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EdgeCondition::Conditional(condition) => {
|
||||||
|
if forbidden_conditions.contains(condition)
|
||||||
|
|| !all_distances.get(edge.dest_node).iter().any(
|
||||||
|
|(next_cost, necessary_conditions)| {
|
||||||
(*next_cost == cost - edge.cost as u16)
|
(*next_cost == cost - edge.cost as u16)
|
||||||
&& !forbidden_edges.intersects(necessary_edges)
|
&& !forbidden_conditions.intersects(necessary_conditions)
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
cur_path.insert(edge_idx);
|
cur_path.insert(condition);
|
||||||
prev_edges.push(edge_idx);
|
// TODO: typed path set
|
||||||
|
prev_conditions.push(condition.into_inner());
|
||||||
let mut new_forbidden_edges = forbidden_edges.clone();
|
|
||||||
new_forbidden_edges.union(&empty_paths_cache.empty_couple_edges[edge_idx as usize]);
|
|
||||||
empty_paths_cache.empty_prefixes.final_edges_after_prefix(prev_edges, &mut |x| {
|
|
||||||
new_forbidden_edges.insert(x);
|
|
||||||
});
|
|
||||||
|
|
||||||
|
let mut new_forbidden_conditions = forbidden_conditions.clone();
|
||||||
|
new_forbidden_conditions
|
||||||
|
.union(empty_paths_cache.condition_couples.get(condition));
|
||||||
|
empty_paths_cache.prefixes.final_edges_after_prefix(
|
||||||
|
prev_conditions,
|
||||||
|
&mut |x| {
|
||||||
|
new_forbidden_conditions.insert(Interned::new(x));
|
||||||
|
},
|
||||||
|
);
|
||||||
let next_any_valid = if edge.dest_node == self.query_graph.end_node {
|
let next_any_valid = if edge.dest_node == self.query_graph.end_node {
|
||||||
any_valid = true;
|
any_valid = true;
|
||||||
visit(prev_edges, self, empty_paths_cache)?;
|
visit(prev_conditions, self, empty_paths_cache)?;
|
||||||
true
|
true
|
||||||
} else {
|
} else {
|
||||||
self.visit_paths_of_cost_rec(
|
self.visit_paths_of_cost_rec(
|
||||||
edge.dest_node as usize,
|
edge.dest_node,
|
||||||
cost - edge.cost as u16,
|
cost - edge.cost as u16,
|
||||||
all_distances,
|
all_distances,
|
||||||
empty_paths_cache,
|
empty_paths_cache,
|
||||||
visit,
|
visit,
|
||||||
prev_edges,
|
prev_conditions,
|
||||||
cur_path,
|
cur_path,
|
||||||
new_forbidden_edges,
|
&mut new_forbidden_conditions,
|
||||||
)?
|
)?
|
||||||
};
|
};
|
||||||
|
cur_path.remove(condition);
|
||||||
|
prev_conditions.pop();
|
||||||
|
next_any_valid
|
||||||
|
}
|
||||||
|
};
|
||||||
any_valid |= next_any_valid;
|
any_valid |= next_any_valid;
|
||||||
cur_path.remove(edge_idx);
|
|
||||||
prev_edges.pop();
|
|
||||||
if next_any_valid {
|
if next_any_valid {
|
||||||
if empty_paths_cache.path_is_empty(prev_edges, cur_path) {
|
if empty_paths_cache.path_is_dead_end(prev_conditions, cur_path) {
|
||||||
return Ok(any_valid);
|
return Ok(any_valid);
|
||||||
}
|
}
|
||||||
forbidden_edges.union(&empty_paths_cache.empty_edges);
|
forbidden_conditions.union(&empty_paths_cache.conditions);
|
||||||
for edge in prev_edges.iter() {
|
for prev_condition in prev_conditions.iter() {
|
||||||
forbidden_edges.union(&empty_paths_cache.empty_couple_edges[*edge as usize]);
|
forbidden_conditions.union(
|
||||||
|
empty_paths_cache.condition_couples.get(Interned::new(*prev_condition)),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
empty_paths_cache.empty_prefixes.final_edges_after_prefix(prev_edges, &mut |x| {
|
empty_paths_cache.prefixes.final_edges_after_prefix(prev_conditions, &mut |x| {
|
||||||
forbidden_edges.insert(x);
|
forbidden_conditions.insert(Interned::new(x));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -107,36 +141,41 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
Ok(any_valid)
|
Ok(any_valid)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn initialize_distances_with_necessary_edges(&self) -> Vec<Vec<(u16, SmallBitmap)>> {
|
pub fn initialize_distances_with_necessary_edges(
|
||||||
let mut distances_to_end: Vec<Vec<(u16, SmallBitmap)>> =
|
&self,
|
||||||
vec![vec![]; self.query_graph.nodes.len()];
|
) -> MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode> {
|
||||||
let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len() as u16);
|
let mut distances_to_end = self.query_graph.nodes.map(|_| vec![]);
|
||||||
|
let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len());
|
||||||
|
|
||||||
let mut node_stack = VecDeque::new();
|
let mut node_stack = VecDeque::new();
|
||||||
|
|
||||||
distances_to_end[self.query_graph.end_node as usize] =
|
*distances_to_end.get_mut(self.query_graph.end_node) =
|
||||||
vec![(0, SmallBitmap::new(self.edges_store.len() as u16))];
|
vec![(0, SmallBitmap::for_interned_values_in(&self.conditions_interner))];
|
||||||
|
|
||||||
for prev_node in
|
for prev_node in self.query_graph.nodes.get(self.query_graph.end_node).predecessors.iter() {
|
||||||
self.query_graph.edges[self.query_graph.end_node as usize].predecessors.iter()
|
node_stack.push_back(prev_node);
|
||||||
{
|
|
||||||
node_stack.push_back(prev_node as usize);
|
|
||||||
enqueued.insert(prev_node);
|
enqueued.insert(prev_node);
|
||||||
}
|
}
|
||||||
|
|
||||||
while let Some(cur_node) = node_stack.pop_front() {
|
while let Some(cur_node) = node_stack.pop_front() {
|
||||||
let mut self_distances = BTreeMap::<u16, SmallBitmap>::new();
|
let mut self_distances = BTreeMap::<u16, SmallBitmap<G::EdgeCondition>>::new();
|
||||||
|
|
||||||
let cur_node_edges = &self.edges_of_node[cur_node];
|
let cur_node_edges = &self.edges_of_node.get(cur_node);
|
||||||
for edge_idx in cur_node_edges.iter() {
|
for edge_idx in cur_node_edges.iter() {
|
||||||
let edge = self.edges_store[edge_idx as usize].as_ref().unwrap();
|
let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
|
||||||
|
let condition = match edge.condition {
|
||||||
|
EdgeCondition::Unconditional => None,
|
||||||
|
EdgeCondition::Conditional(condition) => Some(condition),
|
||||||
|
};
|
||||||
let succ_node = edge.dest_node;
|
let succ_node = edge.dest_node;
|
||||||
let succ_distances = &distances_to_end[succ_node as usize];
|
let succ_distances = distances_to_end.get(succ_node);
|
||||||
for (succ_distance, succ_necessary_edges) in succ_distances {
|
for (succ_distance, succ_necessary_conditions) in succ_distances {
|
||||||
let potential_necessary_edges = SmallBitmap::from_iter(
|
let mut potential_necessary_edges =
|
||||||
std::iter::once(edge_idx).chain(succ_necessary_edges.iter()),
|
SmallBitmap::for_interned_values_in(&self.conditions_interner);
|
||||||
self.edges_store.len() as u16,
|
for condition in condition.into_iter().chain(succ_necessary_conditions.iter()) {
|
||||||
);
|
potential_necessary_edges.insert(condition);
|
||||||
|
}
|
||||||
|
|
||||||
match self_distances.entry(edge.cost as u16 + succ_distance) {
|
match self_distances.entry(edge.cost as u16 + succ_distance) {
|
||||||
Entry::Occupied(mut prev_necessary_edges) => {
|
Entry::Occupied(mut prev_necessary_edges) => {
|
||||||
prev_necessary_edges.get_mut().intersection(&potential_necessary_edges);
|
prev_necessary_edges.get_mut().intersection(&potential_necessary_edges);
|
||||||
@ -147,10 +186,14 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
distances_to_end[cur_node] = self_distances.into_iter().collect();
|
let distances_to_end_cur_node = distances_to_end.get_mut(cur_node);
|
||||||
for prev_node in self.query_graph.edges[cur_node].predecessors.iter() {
|
for (cost, necessary_edges) in self_distances.iter() {
|
||||||
|
distances_to_end_cur_node.push((*cost, necessary_edges.clone()));
|
||||||
|
}
|
||||||
|
*distances_to_end.get_mut(cur_node) = self_distances.into_iter().collect();
|
||||||
|
for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
|
||||||
if !enqueued.contains(prev_node) {
|
if !enqueued.contains(prev_node) {
|
||||||
node_stack.push_back(prev_node as usize);
|
node_stack.push_back(prev_node);
|
||||||
enqueued.insert(prev_node);
|
enqueued.insert(prev_node);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -9,17 +9,17 @@ use crate::search::new::SearchContext;
|
|||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
/// A cache storing the document ids associated with each ranking rule edge
|
/// A cache storing the document ids associated with each ranking rule edge
|
||||||
pub struct EdgeConditionsCache<G: RankingRuleGraphTrait> {
|
pub struct EdgeConditionDocIdsCache<G: RankingRuleGraphTrait> {
|
||||||
// TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap>
|
// TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap>
|
||||||
pub cache: FxHashMap<Interned<G::EdgeCondition>, RoaringBitmap>,
|
pub cache: FxHashMap<Interned<G::EdgeCondition>, RoaringBitmap>,
|
||||||
_phantom: PhantomData<G>,
|
_phantom: PhantomData<G>,
|
||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> Default for EdgeConditionsCache<G> {
|
impl<G: RankingRuleGraphTrait> Default for EdgeConditionDocIdsCache<G> {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self { cache: Default::default(), _phantom: Default::default() }
|
Self { cache: Default::default(), _phantom: Default::default() }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> EdgeConditionsCache<G> {
|
impl<G: RankingRuleGraphTrait> EdgeConditionDocIdsCache<G> {
|
||||||
/// Retrieve the document ids for the given edge condition.
|
/// Retrieve the document ids for the given edge condition.
|
||||||
///
|
///
|
||||||
/// If the cache does not yet contain these docids, they are computed
|
/// If the cache does not yet contain these docids, they are computed
|
||||||
|
@ -1,59 +1,82 @@
|
|||||||
use super::path_set::PathSet;
|
use super::{path_set::PathSet, RankingRuleGraphTrait};
|
||||||
use crate::search::new::small_bitmap::SmallBitmap;
|
use crate::search::new::{
|
||||||
|
interner::{FixedSizeInterner, Interned, MappedInterner},
|
||||||
|
small_bitmap::SmallBitmap,
|
||||||
|
};
|
||||||
|
|
||||||
/// A cache which stores sufficient conditions for a path
|
/// A cache which stores sufficient conditions for a path
|
||||||
/// to resolve to an empty set of candidates within the current
|
/// to resolve to an empty set of candidates within the current
|
||||||
/// universe.
|
/// universe.
|
||||||
#[derive(Clone)]
|
pub struct DeadEndPathCache<G: RankingRuleGraphTrait> {
|
||||||
pub struct EmptyPathsCache {
|
/// The set of edge conditions that resolve to no documents.
|
||||||
/// The set of edge indexes that resolve to no documents.
|
pub conditions: SmallBitmap<G::EdgeCondition>,
|
||||||
pub empty_edges: SmallBitmap,
|
|
||||||
/// A set of path prefixes that resolve to no documents.
|
/// A set of path prefixes that resolve to no documents.
|
||||||
pub empty_prefixes: PathSet,
|
pub prefixes: PathSet,
|
||||||
/// A set of empty couples of edge indexes that resolve to no documents.
|
/// A set of empty couples of edge conditions that resolve to no documents.
|
||||||
pub empty_couple_edges: Vec<SmallBitmap>,
|
pub condition_couples: MappedInterner<SmallBitmap<G::EdgeCondition>, G::EdgeCondition>,
|
||||||
}
|
}
|
||||||
impl EmptyPathsCache {
|
impl<G: RankingRuleGraphTrait> Clone for DeadEndPathCache<G> {
|
||||||
/// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges.
|
fn clone(&self) -> Self {
|
||||||
pub fn new(all_edges_len: u16) -> Self {
|
|
||||||
Self {
|
Self {
|
||||||
empty_edges: SmallBitmap::new(all_edges_len),
|
conditions: self.conditions.clone(),
|
||||||
empty_prefixes: PathSet::default(),
|
prefixes: self.prefixes.clone(),
|
||||||
empty_couple_edges: vec![SmallBitmap::new(all_edges_len); all_edges_len as usize],
|
condition_couples: self.condition_couples.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
|
||||||
|
/// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges.
|
||||||
|
pub fn new(all_edge_conditions: &FixedSizeInterner<G::EdgeCondition>) -> Self {
|
||||||
|
Self {
|
||||||
|
conditions: SmallBitmap::for_interned_values_in(all_edge_conditions),
|
||||||
|
prefixes: PathSet::default(),
|
||||||
|
condition_couples: all_edge_conditions
|
||||||
|
.map(|_| SmallBitmap::for_interned_values_in(all_edge_conditions)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Store in the cache that every path containing the given edge resolves to no documents.
|
/// Store in the cache that every path containing the given edge resolves to no documents.
|
||||||
pub fn forbid_edge(&mut self, edge_idx: u16) {
|
pub fn add_condition(&mut self, condition: Interned<G::EdgeCondition>) {
|
||||||
self.empty_edges.insert(edge_idx);
|
self.conditions.insert(condition);
|
||||||
self.empty_couple_edges[edge_idx as usize].clear();
|
self.condition_couples.get_mut(condition).clear();
|
||||||
self.empty_prefixes.remove_edge(&edge_idx);
|
self.prefixes.remove_edge(condition.into_inner()); // TODO: typed PathSet
|
||||||
for edges2 in self.empty_couple_edges.iter_mut() {
|
for (_, edges2) in self.condition_couples.iter_mut() {
|
||||||
edges2.remove(edge_idx);
|
edges2.remove(condition);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/// Store in the cache that every path containing the given prefix resolves to no documents.
|
/// Store in the cache that every path containing the given prefix resolves to no documents.
|
||||||
pub fn forbid_prefix(&mut self, prefix: &[u16]) {
|
pub fn add_prefix(&mut self, prefix: &[u16]) {
|
||||||
self.empty_prefixes.insert(prefix.iter().copied());
|
// TODO: typed PathSet
|
||||||
|
self.prefixes.insert(prefix.iter().copied());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Store in the cache that every path containing the two given edges resolves to no documents.
|
/// Store in the cache that every path containing the two given edges resolves to no documents.
|
||||||
pub fn forbid_couple_edges(&mut self, edge1: u16, edge2: u16) {
|
pub fn add_condition_couple(
|
||||||
self.empty_couple_edges[edge1 as usize].insert(edge2);
|
&mut self,
|
||||||
|
edge1: Interned<G::EdgeCondition>,
|
||||||
|
edge2: Interned<G::EdgeCondition>,
|
||||||
|
) {
|
||||||
|
self.condition_couples.get_mut(edge1).insert(edge2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if the cache can determine that the given path resolves to no documents.
|
/// Returns true if the cache can determine that the given path resolves to no documents.
|
||||||
pub fn path_is_empty(&self, path: &[u16], path_bitmap: &SmallBitmap) -> bool {
|
pub fn path_is_dead_end(
|
||||||
if path_bitmap.intersects(&self.empty_edges) {
|
&self,
|
||||||
|
path: &[u16],
|
||||||
|
path_bitmap: &SmallBitmap<G::EdgeCondition>,
|
||||||
|
) -> bool {
|
||||||
|
if path_bitmap.intersects(&self.conditions) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
for edge in path.iter() {
|
for edge in path.iter() {
|
||||||
let forbidden_other_edges = &self.empty_couple_edges[*edge as usize];
|
// TODO: typed path
|
||||||
|
let forbidden_other_edges = self.condition_couples.get(Interned::new(*edge));
|
||||||
if path_bitmap.intersects(forbidden_other_edges) {
|
if path_bitmap.intersects(forbidden_other_edges) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if self.empty_prefixes.contains_prefix_of_path(path) {
|
if self.prefixes.contains_prefix_of_path(path) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
false
|
false
|
||||||
|
@ -18,13 +18,13 @@ mod typo;
|
|||||||
|
|
||||||
use std::hash::Hash;
|
use std::hash::Hash;
|
||||||
|
|
||||||
pub use edge_docids_cache::EdgeConditionsCache;
|
pub use edge_docids_cache::EdgeConditionDocIdsCache;
|
||||||
pub use empty_paths_cache::EmptyPathsCache;
|
pub use empty_paths_cache::DeadEndPathCache;
|
||||||
pub use proximity::ProximityGraph;
|
pub use proximity::{ProximityEdge, ProximityGraph};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
pub use typo::TypoGraph;
|
pub use typo::{TypoEdge, TypoGraph};
|
||||||
|
|
||||||
use super::interner::{Interned, Interner};
|
use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner};
|
||||||
use super::logger::SearchLogger;
|
use super::logger::SearchLogger;
|
||||||
use super::small_bitmap::SmallBitmap;
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::{QueryGraph, QueryNode, SearchContext};
|
use super::{QueryGraph, QueryNode, SearchContext};
|
||||||
@ -63,8 +63,8 @@ impl<E> Clone for EdgeCondition<E> {
|
|||||||
/// 3. The condition associated with it
|
/// 3. The condition associated with it
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Edge<E> {
|
pub struct Edge<E> {
|
||||||
pub source_node: u16,
|
pub source_node: Interned<QueryNode>,
|
||||||
pub dest_node: u16,
|
pub dest_node: Interned<QueryNode>,
|
||||||
pub cost: u8,
|
pub cost: u8,
|
||||||
pub condition: EdgeCondition<E>,
|
pub condition: EdgeCondition<E>,
|
||||||
}
|
}
|
||||||
@ -96,7 +96,7 @@ pub trait RankingRuleGraphTrait: Sized {
|
|||||||
/// (with [`build_step_visit_source_node`](RankingRuleGraphTrait::build_step_visit_source_node)) to `dest_node`.
|
/// (with [`build_step_visit_source_node`](RankingRuleGraphTrait::build_step_visit_source_node)) to `dest_node`.
|
||||||
fn build_edges<'ctx>(
|
fn build_edges<'ctx>(
|
||||||
ctx: &mut SearchContext<'ctx>,
|
ctx: &mut SearchContext<'ctx>,
|
||||||
conditions_interner: &mut Interner<Self::EdgeCondition>,
|
conditions_interner: &mut DedupInterner<Self::EdgeCondition>,
|
||||||
source_node: &QueryNode,
|
source_node: &QueryNode,
|
||||||
dest_node: &QueryNode,
|
dest_node: &QueryNode,
|
||||||
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>>;
|
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>>;
|
||||||
@ -104,9 +104,9 @@ pub trait RankingRuleGraphTrait: Sized {
|
|||||||
fn log_state(
|
fn log_state(
|
||||||
graph: &RankingRuleGraph<Self>,
|
graph: &RankingRuleGraph<Self>,
|
||||||
paths: &[Vec<u16>],
|
paths: &[Vec<u16>],
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &DeadEndPathCache<Self>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &[Vec<(u16, SmallBitmap)>],
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<Self::EdgeCondition>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
);
|
);
|
||||||
@ -118,9 +118,9 @@ pub trait RankingRuleGraphTrait: Sized {
|
|||||||
/// but replacing the edges.
|
/// but replacing the edges.
|
||||||
pub struct RankingRuleGraph<G: RankingRuleGraphTrait> {
|
pub struct RankingRuleGraph<G: RankingRuleGraphTrait> {
|
||||||
pub query_graph: QueryGraph,
|
pub query_graph: QueryGraph,
|
||||||
pub edges_store: Vec<Option<Edge<G::EdgeCondition>>>,
|
pub edges_store: FixedSizeInterner<Option<Edge<G::EdgeCondition>>>,
|
||||||
pub edges_of_node: Vec<SmallBitmap>,
|
pub edges_of_node: MappedInterner<SmallBitmap<Option<Edge<G::EdgeCondition>>>, QueryNode>,
|
||||||
pub conditions_interner: Interner<G::EdgeCondition>,
|
pub conditions_interner: FixedSizeInterner<G::EdgeCondition>,
|
||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
|
impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
|
||||||
fn clone(&self) -> Self {
|
fn clone(&self) -> Self {
|
||||||
@ -133,13 +133,20 @@ impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||||
/// Remove the given edge from the ranking rule graph
|
/// Remove all edges with the given condition
|
||||||
pub fn remove_ranking_rule_edge(&mut self, edge_index: u16) {
|
pub fn remove_edges_with_condition(&mut self, condition_to_remove: Interned<G::EdgeCondition>) {
|
||||||
let edge_opt = &mut self.edges_store[edge_index as usize];
|
for (edge_id, edge_opt) in self.edges_store.iter_mut() {
|
||||||
let Some(edge) = &edge_opt else { return };
|
let Some(edge) = edge_opt.as_mut() else { continue };
|
||||||
|
match edge.condition {
|
||||||
|
EdgeCondition::Unconditional => continue,
|
||||||
|
EdgeCondition::Conditional(condition) => {
|
||||||
|
if condition == condition_to_remove {
|
||||||
let (source_node, _dest_node) = (edge.source_node, edge.dest_node);
|
let (source_node, _dest_node) = (edge.source_node, edge.dest_node);
|
||||||
*edge_opt = None;
|
*edge_opt = None;
|
||||||
|
self.edges_of_node.get_mut(source_node).remove(edge_id);
|
||||||
self.edges_of_node[source_node as usize].remove(edge_index);
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -27,10 +27,10 @@ impl PathSet {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn remove_edge(&mut self, forbidden_edge: &u16) {
|
pub fn remove_edge(&mut self, forbidden_edge: u16) {
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
while i < self.nodes.len() {
|
while i < self.nodes.len() {
|
||||||
let should_remove = if &self.nodes[i].0 == forbidden_edge {
|
let should_remove = if self.nodes[i].0 == forbidden_edge {
|
||||||
true
|
true
|
||||||
} else if !self.nodes[i].1.nodes.is_empty() {
|
} else if !self.nodes[i].1.nodes.is_empty() {
|
||||||
self.nodes[i].1.remove_edge(forbidden_edge);
|
self.nodes[i].1.remove_edge(forbidden_edge);
|
||||||
|
@ -3,7 +3,8 @@ use std::collections::BTreeMap;
|
|||||||
|
|
||||||
use super::ProximityEdge;
|
use super::ProximityEdge;
|
||||||
use crate::search::new::db_cache::DatabaseCache;
|
use crate::search::new::db_cache::DatabaseCache;
|
||||||
use crate::search::new::interner::{Interned, Interner};
|
use crate::search::new::interner::{DedupInterner, Interned};
|
||||||
|
use crate::search::new::query_graph::QueryNodeData;
|
||||||
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm};
|
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm};
|
||||||
use crate::search::new::ranking_rule_graph::proximity::WordPair;
|
use crate::search::new::ranking_rule_graph::proximity::WordPair;
|
||||||
use crate::search::new::ranking_rule_graph::EdgeCondition;
|
use crate::search::new::ranking_rule_graph::EdgeCondition;
|
||||||
@ -13,7 +14,7 @@ use heed::RoTxn;
|
|||||||
|
|
||||||
fn last_word_of_term_iter<'t>(
|
fn last_word_of_term_iter<'t>(
|
||||||
t: &'t QueryTerm,
|
t: &'t QueryTerm,
|
||||||
phrase_interner: &'t Interner<Phrase>,
|
phrase_interner: &'t DedupInterner<Phrase>,
|
||||||
) -> impl Iterator<Item = (Option<Interned<Phrase>>, Interned<String>)> + 't {
|
) -> impl Iterator<Item = (Option<Interned<Phrase>>, Interned<String>)> + 't {
|
||||||
t.all_single_words_except_prefix_db().map(|w| (None, w)).chain(t.all_phrases().flat_map(
|
t.all_single_words_except_prefix_db().map(|w| (None, w)).chain(t.all_phrases().flat_map(
|
||||||
move |p| {
|
move |p| {
|
||||||
@ -24,7 +25,7 @@ fn last_word_of_term_iter<'t>(
|
|||||||
}
|
}
|
||||||
fn first_word_of_term_iter<'t>(
|
fn first_word_of_term_iter<'t>(
|
||||||
t: &'t QueryTerm,
|
t: &'t QueryTerm,
|
||||||
phrase_interner: &'t Interner<Phrase>,
|
phrase_interner: &'t DedupInterner<Phrase>,
|
||||||
) -> impl Iterator<Item = (Interned<String>, Option<Interned<Phrase>>)> + 't {
|
) -> impl Iterator<Item = (Interned<String>, Option<Interned<Phrase>>)> + 't {
|
||||||
t.all_single_words_except_prefix_db().map(|w| (w, None)).chain(t.all_phrases().flat_map(
|
t.all_single_words_except_prefix_db().map(|w| (w, None)).chain(t.all_phrases().flat_map(
|
||||||
move |p| {
|
move |p| {
|
||||||
@ -36,7 +37,7 @@ fn first_word_of_term_iter<'t>(
|
|||||||
|
|
||||||
pub fn build_edges<'ctx>(
|
pub fn build_edges<'ctx>(
|
||||||
ctx: &mut SearchContext<'ctx>,
|
ctx: &mut SearchContext<'ctx>,
|
||||||
conditions_interner: &mut Interner<ProximityEdge>,
|
conditions_interner: &mut DedupInterner<ProximityEdge>,
|
||||||
from_node: &QueryNode,
|
from_node: &QueryNode,
|
||||||
to_node: &QueryNode,
|
to_node: &QueryNode,
|
||||||
) -> Result<Vec<(u8, EdgeCondition<ProximityEdge>)>> {
|
) -> Result<Vec<(u8, EdgeCondition<ProximityEdge>)>> {
|
||||||
@ -50,19 +51,19 @@ pub fn build_edges<'ctx>(
|
|||||||
term_docids: _,
|
term_docids: _,
|
||||||
} = ctx;
|
} = ctx;
|
||||||
|
|
||||||
let (left_term, left_end_position) = match from_node {
|
let (left_term, left_end_position) = match &from_node.data {
|
||||||
QueryNode::Term(LocatedQueryTerm { value, positions }) => {
|
QueryNodeData::Term(LocatedQueryTerm { value, positions }) => {
|
||||||
(term_interner.get(*value), *positions.end())
|
(term_interner.get(*value), *positions.end())
|
||||||
}
|
}
|
||||||
QueryNode::Deleted => return Ok(vec![]),
|
QueryNodeData::Deleted => return Ok(vec![]),
|
||||||
QueryNode::Start => return Ok(vec![(0, EdgeCondition::Unconditional)]),
|
QueryNodeData::Start => return Ok(vec![(0, EdgeCondition::Unconditional)]),
|
||||||
QueryNode::End => return Ok(vec![]),
|
QueryNodeData::End => return Ok(vec![]),
|
||||||
};
|
};
|
||||||
|
|
||||||
let right_term = match &to_node {
|
let right_term = match &to_node.data {
|
||||||
QueryNode::End => return Ok(vec![(0, EdgeCondition::Unconditional)]),
|
QueryNodeData::End => return Ok(vec![(0, EdgeCondition::Unconditional)]),
|
||||||
QueryNode::Deleted | QueryNode::Start => return Ok(vec![]),
|
QueryNodeData::Deleted | QueryNodeData::Start => return Ok(vec![]),
|
||||||
QueryNode::Term(term) => term,
|
QueryNodeData::Term(term) => term,
|
||||||
};
|
};
|
||||||
let LocatedQueryTerm { value: right_value, positions: right_positions } = right_term;
|
let LocatedQueryTerm { value: right_value, positions: right_positions } = right_term;
|
||||||
|
|
||||||
@ -145,7 +146,7 @@ fn add_prefix_edges<'ctx>(
|
|||||||
index: &mut &crate::Index,
|
index: &mut &crate::Index,
|
||||||
txn: &'ctx RoTxn,
|
txn: &'ctx RoTxn,
|
||||||
db_cache: &mut DatabaseCache<'ctx>,
|
db_cache: &mut DatabaseCache<'ctx>,
|
||||||
word_interner: &mut Interner<String>,
|
word_interner: &mut DedupInterner<String>,
|
||||||
right_ngram_length: usize,
|
right_ngram_length: usize,
|
||||||
left_word: Interned<String>,
|
left_word: Interned<String>,
|
||||||
right_prefix: Interned<String>,
|
right_prefix: Interned<String>,
|
||||||
@ -207,7 +208,7 @@ fn add_non_prefix_edges<'ctx>(
|
|||||||
index: &mut &crate::Index,
|
index: &mut &crate::Index,
|
||||||
txn: &'ctx RoTxn,
|
txn: &'ctx RoTxn,
|
||||||
db_cache: &mut DatabaseCache<'ctx>,
|
db_cache: &mut DatabaseCache<'ctx>,
|
||||||
word_interner: &mut Interner<String>,
|
word_interner: &mut DedupInterner<String>,
|
||||||
right_ngram_length: usize,
|
right_ngram_length: usize,
|
||||||
word1: Interned<String>,
|
word1: Interned<String>,
|
||||||
word2: Interned<String>,
|
word2: Interned<String>,
|
||||||
|
@ -3,9 +3,9 @@ pub mod compute_docids;
|
|||||||
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::empty_paths_cache::EmptyPathsCache;
|
use super::empty_paths_cache::DeadEndPathCache;
|
||||||
use super::{EdgeCondition, RankingRuleGraphTrait};
|
use super::{EdgeCondition, RankingRuleGraphTrait};
|
||||||
use crate::search::new::interner::{Interned, Interner};
|
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
|
||||||
use crate::search::new::logger::SearchLogger;
|
use crate::search::new::logger::SearchLogger;
|
||||||
use crate::search::new::query_term::Phrase;
|
use crate::search::new::query_term::Phrase;
|
||||||
use crate::search::new::small_bitmap::SmallBitmap;
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
@ -56,7 +56,7 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
|||||||
|
|
||||||
fn build_edges<'ctx>(
|
fn build_edges<'ctx>(
|
||||||
ctx: &mut SearchContext<'ctx>,
|
ctx: &mut SearchContext<'ctx>,
|
||||||
conditions_interner: &mut Interner<Self::EdgeCondition>,
|
conditions_interner: &mut DedupInterner<Self::EdgeCondition>,
|
||||||
source_node: &QueryNode,
|
source_node: &QueryNode,
|
||||||
dest_node: &QueryNode,
|
dest_node: &QueryNode,
|
||||||
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
|
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
|
||||||
@ -66,19 +66,12 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
|||||||
fn log_state(
|
fn log_state(
|
||||||
graph: &super::RankingRuleGraph<Self>,
|
graph: &super::RankingRuleGraph<Self>,
|
||||||
paths: &[Vec<u16>],
|
paths: &[Vec<u16>],
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &DeadEndPathCache<Self>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &[Vec<(u16, SmallBitmap)>],
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityEdge>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
) {
|
) {
|
||||||
logger.log_proximity_state(
|
logger.log_proximity_state(graph, paths, empty_paths_cache, universe, distances, cost);
|
||||||
graph,
|
|
||||||
paths,
|
|
||||||
empty_paths_cache,
|
|
||||||
universe,
|
|
||||||
distances.to_vec(),
|
|
||||||
cost,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::empty_paths_cache::EmptyPathsCache;
|
use super::empty_paths_cache::DeadEndPathCache;
|
||||||
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
use crate::search::new::interner::{Interned, Interner};
|
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
|
||||||
use crate::search::new::logger::SearchLogger;
|
use crate::search::new::logger::SearchLogger;
|
||||||
|
use crate::search::new::query_graph::QueryNodeData;
|
||||||
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm};
|
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm};
|
||||||
use crate::search::new::small_bitmap::SmallBitmap;
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
|
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
|
||||||
@ -55,13 +56,13 @@ impl RankingRuleGraphTrait for TypoGraph {
|
|||||||
|
|
||||||
fn build_edges<'ctx>(
|
fn build_edges<'ctx>(
|
||||||
ctx: &mut SearchContext<'ctx>,
|
ctx: &mut SearchContext<'ctx>,
|
||||||
conditions_interner: &mut Interner<Self::EdgeCondition>,
|
conditions_interner: &mut DedupInterner<Self::EdgeCondition>,
|
||||||
_from_node: &QueryNode,
|
_from_node: &QueryNode,
|
||||||
to_node: &QueryNode,
|
to_node: &QueryNode,
|
||||||
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
|
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
|
||||||
let SearchContext { term_interner, .. } = ctx;
|
let SearchContext { term_interner, .. } = ctx;
|
||||||
match to_node {
|
match &to_node.data {
|
||||||
QueryNode::Term(LocatedQueryTerm { value, positions }) => {
|
QueryNodeData::Term(LocatedQueryTerm { value, positions }) => {
|
||||||
let mut edges = vec![];
|
let mut edges = vec![];
|
||||||
// Ngrams have a base typo cost
|
// Ngrams have a base typo cost
|
||||||
// 2-gram -> equivalent to 1 typo
|
// 2-gram -> equivalent to 1 typo
|
||||||
@ -130,20 +131,20 @@ impl RankingRuleGraphTrait for TypoGraph {
|
|||||||
}
|
}
|
||||||
Ok(edges)
|
Ok(edges)
|
||||||
}
|
}
|
||||||
QueryNode::End => Ok(vec![(0, EdgeCondition::Unconditional)]),
|
QueryNodeData::End => Ok(vec![(0, EdgeCondition::Unconditional)]),
|
||||||
QueryNode::Deleted | QueryNode::Start => panic!(),
|
QueryNodeData::Deleted | QueryNodeData::Start => panic!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn log_state(
|
fn log_state(
|
||||||
graph: &RankingRuleGraph<Self>,
|
graph: &RankingRuleGraph<Self>,
|
||||||
paths: &[Vec<u16>],
|
paths: &[Vec<u16>],
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &DeadEndPathCache<Self>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &[Vec<(u16, SmallBitmap)>],
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
) {
|
) {
|
||||||
logger.log_typo_state(graph, paths, empty_paths_cache, universe, distances.to_vec(), cost);
|
logger.log_typo_state(graph, paths, empty_paths_cache, universe, distances, cost);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -186,6 +186,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
// anything, just extend the results and go back to the parent ranking rule.
|
// anything, just extend the results and go back to the parent ranking rule.
|
||||||
if ranking_rule_universes[cur_ranking_rule_index].len() <= 1 {
|
if ranking_rule_universes[cur_ranking_rule_index].len() <= 1 {
|
||||||
maybe_add_to_results!(&ranking_rule_universes[cur_ranking_rule_index]);
|
maybe_add_to_results!(&ranking_rule_universes[cur_ranking_rule_index]);
|
||||||
|
ranking_rule_universes[cur_ranking_rule_index].clear();
|
||||||
back!();
|
back!();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -7,11 +7,11 @@ use heed::{BytesDecode, RoTxn};
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::db_cache::DatabaseCache;
|
use super::db_cache::DatabaseCache;
|
||||||
use super::interner::{Interned, Interner};
|
use super::interner::{DedupInterner, Interned};
|
||||||
use super::query_graph::QUERY_GRAPH_NODE_LENGTH_LIMIT;
|
use super::query_graph::QueryNodeData;
|
||||||
use super::query_term::{Phrase, QueryTerm};
|
use super::query_term::{Phrase, QueryTerm};
|
||||||
use super::small_bitmap::SmallBitmap;
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::{QueryGraph, QueryNode, SearchContext};
|
use super::{QueryGraph, SearchContext};
|
||||||
use crate::{CboRoaringBitmapCodec, Index, Result, RoaringBitmapCodec};
|
use crate::{CboRoaringBitmapCodec, Index, Result, RoaringBitmapCodec};
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
@ -26,8 +26,8 @@ impl QueryTermDocIdsCache {
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'ctx RoTxn,
|
txn: &'ctx RoTxn,
|
||||||
db_cache: &mut DatabaseCache<'ctx>,
|
db_cache: &mut DatabaseCache<'ctx>,
|
||||||
word_interner: &Interner<String>,
|
word_interner: &DedupInterner<String>,
|
||||||
phrase_interner: &Interner<Phrase>,
|
phrase_interner: &DedupInterner<Phrase>,
|
||||||
phrase: Interned<Phrase>,
|
phrase: Interned<Phrase>,
|
||||||
) -> Result<&'s RoaringBitmap> {
|
) -> Result<&'s RoaringBitmap> {
|
||||||
if self.phrases.contains_key(&phrase) {
|
if self.phrases.contains_key(&phrase) {
|
||||||
@ -44,9 +44,9 @@ impl QueryTermDocIdsCache {
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'ctx RoTxn,
|
txn: &'ctx RoTxn,
|
||||||
db_cache: &mut DatabaseCache<'ctx>,
|
db_cache: &mut DatabaseCache<'ctx>,
|
||||||
word_interner: &Interner<String>,
|
word_interner: &DedupInterner<String>,
|
||||||
term_interner: &Interner<QueryTerm>,
|
term_interner: &DedupInterner<QueryTerm>,
|
||||||
phrase_interner: &Interner<Phrase>,
|
phrase_interner: &DedupInterner<Phrase>,
|
||||||
term_interned: Interned<QueryTerm>,
|
term_interned: Interned<QueryTerm>,
|
||||||
) -> Result<&'s RoaringBitmap> {
|
) -> Result<&'s RoaringBitmap> {
|
||||||
if self.terms.contains_key(&term_interned) {
|
if self.terms.contains_key(&term_interned) {
|
||||||
@ -105,28 +105,27 @@ pub fn resolve_query_graph<'ctx>(
|
|||||||
// TODO: there is a faster way to compute this big
|
// TODO: there is a faster way to compute this big
|
||||||
// roaring bitmap expression
|
// roaring bitmap expression
|
||||||
|
|
||||||
let mut nodes_resolved = SmallBitmap::new(QUERY_GRAPH_NODE_LENGTH_LIMIT);
|
let mut nodes_resolved = SmallBitmap::for_interned_values_in(&q.nodes);
|
||||||
let mut path_nodes_docids = vec![RoaringBitmap::new(); q.nodes.len()];
|
let mut path_nodes_docids = q.nodes.map(|_| RoaringBitmap::new());
|
||||||
|
|
||||||
let mut next_nodes_to_visit = VecDeque::new();
|
let mut next_nodes_to_visit = VecDeque::new();
|
||||||
next_nodes_to_visit.push_back(q.root_node);
|
next_nodes_to_visit.push_back(q.root_node);
|
||||||
|
|
||||||
while let Some(node) = next_nodes_to_visit.pop_front() {
|
while let Some(node_id) = next_nodes_to_visit.pop_front() {
|
||||||
let predecessors = &q.edges[node as usize].predecessors;
|
let node = q.nodes.get(node_id);
|
||||||
|
let predecessors = &node.predecessors;
|
||||||
if !predecessors.is_subset(&nodes_resolved) {
|
if !predecessors.is_subset(&nodes_resolved) {
|
||||||
next_nodes_to_visit.push_back(node);
|
next_nodes_to_visit.push_back(node_id);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Take union of all predecessors
|
// Take union of all predecessors
|
||||||
let mut predecessors_docids = RoaringBitmap::new();
|
let mut predecessors_docids = RoaringBitmap::new();
|
||||||
for p in predecessors.iter() {
|
for p in predecessors.iter() {
|
||||||
predecessors_docids |= &path_nodes_docids[p as usize];
|
predecessors_docids |= path_nodes_docids.get(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
let n = &q.nodes[node as usize];
|
let node_docids = match &node.data {
|
||||||
|
QueryNodeData::Term(located_term) => {
|
||||||
let node_docids = match n {
|
|
||||||
QueryNode::Term(located_term) => {
|
|
||||||
let term_docids = query_term_docids.get_query_term_docids(
|
let term_docids = query_term_docids.get_query_term_docids(
|
||||||
index,
|
index,
|
||||||
txn,
|
txn,
|
||||||
@ -138,26 +137,26 @@ pub fn resolve_query_graph<'ctx>(
|
|||||||
)?;
|
)?;
|
||||||
predecessors_docids & term_docids
|
predecessors_docids & term_docids
|
||||||
}
|
}
|
||||||
QueryNode::Deleted => {
|
QueryNodeData::Deleted => {
|
||||||
panic!()
|
panic!()
|
||||||
}
|
}
|
||||||
QueryNode::Start => universe.clone(),
|
QueryNodeData::Start => universe.clone(),
|
||||||
QueryNode::End => {
|
QueryNodeData::End => {
|
||||||
return Ok(predecessors_docids);
|
return Ok(predecessors_docids);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
nodes_resolved.insert(node);
|
nodes_resolved.insert(node_id);
|
||||||
path_nodes_docids[node as usize] = node_docids;
|
*path_nodes_docids.get_mut(node_id) = node_docids;
|
||||||
|
|
||||||
for succ in q.edges[node as usize].successors.iter() {
|
for succ in node.successors.iter() {
|
||||||
if !next_nodes_to_visit.contains(&succ) && !nodes_resolved.contains(succ) {
|
if !next_nodes_to_visit.contains(&succ) && !nodes_resolved.contains(succ) {
|
||||||
next_nodes_to_visit.push_back(succ);
|
next_nodes_to_visit.push_back(succ);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for prec in q.edges[node as usize].predecessors.iter() {
|
for prec in node.predecessors.iter() {
|
||||||
if q.edges[prec as usize].successors.is_subset(&nodes_resolved) {
|
if q.nodes.get(prec).successors.is_subset(&nodes_resolved) {
|
||||||
path_nodes_docids[prec as usize].clear();
|
path_nodes_docids.get_mut(prec).clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -168,8 +167,8 @@ pub fn resolve_phrase<'ctx>(
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'ctx RoTxn,
|
txn: &'ctx RoTxn,
|
||||||
db_cache: &mut DatabaseCache<'ctx>,
|
db_cache: &mut DatabaseCache<'ctx>,
|
||||||
word_interner: &Interner<String>,
|
word_interner: &DedupInterner<String>,
|
||||||
phrase_interner: &Interner<Phrase>,
|
phrase_interner: &DedupInterner<Phrase>,
|
||||||
phrase: Interned<Phrase>,
|
phrase: Interned<Phrase>,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
let Phrase { words } = phrase_interner.get(phrase).clone();
|
let Phrase { words } = phrase_interner.get(phrase).clone();
|
||||||
|
@ -1,9 +1,85 @@
|
|||||||
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
|
use super::interner::{FixedSizeInterner, Interned};
|
||||||
|
|
||||||
|
pub struct SmallBitmap<T> {
|
||||||
|
internal: SmallBitmapInternal,
|
||||||
|
_phantom: PhantomData<T>,
|
||||||
|
}
|
||||||
|
impl<T> Clone for SmallBitmap<T> {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
Self { internal: self.internal.clone(), _phantom: PhantomData }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<T> SmallBitmap<T> {
|
||||||
|
pub fn for_interned_values_in(interner: &FixedSizeInterner<T>) -> Self {
|
||||||
|
Self::new(interner.len())
|
||||||
|
}
|
||||||
|
pub fn new(universe_length: u16) -> Self {
|
||||||
|
if universe_length <= 64 {
|
||||||
|
Self { internal: SmallBitmapInternal::Tiny(0), _phantom: PhantomData }
|
||||||
|
} else {
|
||||||
|
Self {
|
||||||
|
internal: SmallBitmapInternal::Small(
|
||||||
|
vec![0; 1 + universe_length as usize / 64].into_boxed_slice(),
|
||||||
|
),
|
||||||
|
_phantom: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn from_iter(
|
||||||
|
xs: impl Iterator<Item = Interned<T>>,
|
||||||
|
for_interner: &FixedSizeInterner<T>,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
internal: SmallBitmapInternal::from_iter(
|
||||||
|
xs.map(|x| x.into_inner()),
|
||||||
|
for_interner.len(),
|
||||||
|
),
|
||||||
|
_phantom: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.internal.is_empty()
|
||||||
|
}
|
||||||
|
pub fn clear(&mut self) {
|
||||||
|
self.internal.clear()
|
||||||
|
}
|
||||||
|
pub fn contains(&self, x: Interned<T>) -> bool {
|
||||||
|
self.internal.contains(x.into_inner())
|
||||||
|
}
|
||||||
|
pub fn insert(&mut self, x: Interned<T>) {
|
||||||
|
self.internal.insert(x.into_inner())
|
||||||
|
}
|
||||||
|
pub fn remove(&mut self, x: Interned<T>) {
|
||||||
|
self.internal.remove(x.into_inner())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn intersection(&mut self, other: &Self) {
|
||||||
|
self.internal.intersection(&other.internal)
|
||||||
|
}
|
||||||
|
pub fn union(&mut self, other: &Self) {
|
||||||
|
self.internal.union(&other.internal)
|
||||||
|
}
|
||||||
|
pub fn subtract(&mut self, other: &Self) {
|
||||||
|
self.internal.subtract(&other.internal)
|
||||||
|
}
|
||||||
|
pub fn is_subset(&self, other: &Self) -> bool {
|
||||||
|
self.internal.is_subset(&other.internal)
|
||||||
|
}
|
||||||
|
pub fn intersects(&self, other: &Self) -> bool {
|
||||||
|
self.internal.intersects(&other.internal)
|
||||||
|
}
|
||||||
|
pub fn iter(&self) -> impl Iterator<Item = Interned<T>> + '_ {
|
||||||
|
self.internal.iter().map(|x| Interned::new(x))
|
||||||
|
}
|
||||||
|
}
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub enum SmallBitmap {
|
pub enum SmallBitmapInternal {
|
||||||
Tiny(u64),
|
Tiny(u64),
|
||||||
Small(Box<[u64]>),
|
Small(Box<[u64]>),
|
||||||
}
|
}
|
||||||
impl SmallBitmap {
|
impl SmallBitmapInternal {
|
||||||
pub fn new(universe_length: u16) -> Self {
|
pub fn new(universe_length: u16) -> Self {
|
||||||
if universe_length <= 64 {
|
if universe_length <= 64 {
|
||||||
Self::Tiny(0)
|
Self::Tiny(0)
|
||||||
@ -20,8 +96,8 @@ impl SmallBitmap {
|
|||||||
}
|
}
|
||||||
pub fn is_empty(&self) -> bool {
|
pub fn is_empty(&self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
SmallBitmap::Tiny(set) => *set == 0,
|
SmallBitmapInternal::Tiny(set) => *set == 0,
|
||||||
SmallBitmap::Small(sets) => {
|
SmallBitmapInternal::Small(sets) => {
|
||||||
for set in sets.iter() {
|
for set in sets.iter() {
|
||||||
if *set != 0 {
|
if *set != 0 {
|
||||||
return false;
|
return false;
|
||||||
@ -33,8 +109,8 @@ impl SmallBitmap {
|
|||||||
}
|
}
|
||||||
pub fn clear(&mut self) {
|
pub fn clear(&mut self) {
|
||||||
match self {
|
match self {
|
||||||
SmallBitmap::Tiny(set) => *set = 0,
|
SmallBitmapInternal::Tiny(set) => *set = 0,
|
||||||
SmallBitmap::Small(sets) => {
|
SmallBitmapInternal::Small(sets) => {
|
||||||
for set in sets.iter_mut() {
|
for set in sets.iter_mut() {
|
||||||
*set = 0;
|
*set = 0;
|
||||||
}
|
}
|
||||||
@ -43,8 +119,8 @@ impl SmallBitmap {
|
|||||||
}
|
}
|
||||||
pub fn contains(&self, mut x: u16) -> bool {
|
pub fn contains(&self, mut x: u16) -> bool {
|
||||||
let set = match self {
|
let set = match self {
|
||||||
SmallBitmap::Tiny(set) => *set,
|
SmallBitmapInternal::Tiny(set) => *set,
|
||||||
SmallBitmap::Small(set) => {
|
SmallBitmapInternal::Small(set) => {
|
||||||
let idx = x / 64;
|
let idx = x / 64;
|
||||||
x %= 64;
|
x %= 64;
|
||||||
set[idx as usize]
|
set[idx as usize]
|
||||||
@ -54,8 +130,8 @@ impl SmallBitmap {
|
|||||||
}
|
}
|
||||||
pub fn insert(&mut self, mut x: u16) {
|
pub fn insert(&mut self, mut x: u16) {
|
||||||
let set = match self {
|
let set = match self {
|
||||||
SmallBitmap::Tiny(set) => set,
|
SmallBitmapInternal::Tiny(set) => set,
|
||||||
SmallBitmap::Small(set) => {
|
SmallBitmapInternal::Small(set) => {
|
||||||
let idx = x / 64;
|
let idx = x / 64;
|
||||||
x %= 64;
|
x %= 64;
|
||||||
&mut set[idx as usize]
|
&mut set[idx as usize]
|
||||||
@ -65,8 +141,8 @@ impl SmallBitmap {
|
|||||||
}
|
}
|
||||||
pub fn remove(&mut self, mut x: u16) {
|
pub fn remove(&mut self, mut x: u16) {
|
||||||
let set = match self {
|
let set = match self {
|
||||||
SmallBitmap::Tiny(set) => set,
|
SmallBitmapInternal::Tiny(set) => set,
|
||||||
SmallBitmap::Small(set) => {
|
SmallBitmapInternal::Small(set) => {
|
||||||
let idx = x / 64;
|
let idx = x / 64;
|
||||||
x %= 64;
|
x %= 64;
|
||||||
&mut set[idx as usize]
|
&mut set[idx as usize]
|
||||||
@ -75,20 +151,20 @@ impl SmallBitmap {
|
|||||||
*set &= !(0b1 << x);
|
*set &= !(0b1 << x);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn intersection(&mut self, other: &SmallBitmap) {
|
pub fn intersection(&mut self, other: &SmallBitmapInternal) {
|
||||||
self.apply_op(other, |a, b| *a &= b);
|
self.apply_op(other, |a, b| *a &= b);
|
||||||
}
|
}
|
||||||
pub fn union(&mut self, other: &SmallBitmap) {
|
pub fn union(&mut self, other: &SmallBitmapInternal) {
|
||||||
self.apply_op(other, |a, b| *a |= b);
|
self.apply_op(other, |a, b| *a |= b);
|
||||||
}
|
}
|
||||||
pub fn subtract(&mut self, other: &SmallBitmap) {
|
pub fn subtract(&mut self, other: &SmallBitmapInternal) {
|
||||||
self.apply_op(other, |a, b| *a &= !b);
|
self.apply_op(other, |a, b| *a &= !b);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn apply_op(&mut self, other: &SmallBitmap, op: impl Fn(&mut u64, u64)) {
|
pub fn apply_op(&mut self, other: &SmallBitmapInternal, op: impl Fn(&mut u64, u64)) {
|
||||||
match (self, other) {
|
match (self, other) {
|
||||||
(SmallBitmap::Tiny(a), SmallBitmap::Tiny(b)) => op(a, *b),
|
(SmallBitmapInternal::Tiny(a), SmallBitmapInternal::Tiny(b)) => op(a, *b),
|
||||||
(SmallBitmap::Small(a), SmallBitmap::Small(b)) => {
|
(SmallBitmapInternal::Small(a), SmallBitmapInternal::Small(b)) => {
|
||||||
assert!(a.len() == b.len(),);
|
assert!(a.len() == b.len(),);
|
||||||
for (a, b) in a.iter_mut().zip(b.iter()) {
|
for (a, b) in a.iter_mut().zip(b.iter()) {
|
||||||
op(a, *b);
|
op(a, *b);
|
||||||
@ -99,10 +175,14 @@ impl SmallBitmap {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn all_satisfy_op(&self, other: &SmallBitmap, op: impl Fn(u64, u64) -> bool) -> bool {
|
pub fn all_satisfy_op(
|
||||||
|
&self,
|
||||||
|
other: &SmallBitmapInternal,
|
||||||
|
op: impl Fn(u64, u64) -> bool,
|
||||||
|
) -> bool {
|
||||||
match (self, other) {
|
match (self, other) {
|
||||||
(SmallBitmap::Tiny(a), SmallBitmap::Tiny(b)) => op(*a, *b),
|
(SmallBitmapInternal::Tiny(a), SmallBitmapInternal::Tiny(b)) => op(*a, *b),
|
||||||
(SmallBitmap::Small(a), SmallBitmap::Small(b)) => {
|
(SmallBitmapInternal::Small(a), SmallBitmapInternal::Small(b)) => {
|
||||||
assert!(a.len() == b.len());
|
assert!(a.len() == b.len());
|
||||||
for (a, b) in a.iter().zip(b.iter()) {
|
for (a, b) in a.iter().zip(b.iter()) {
|
||||||
if !op(*a, *b) {
|
if !op(*a, *b) {
|
||||||
@ -116,10 +196,14 @@ impl SmallBitmap {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn any_satisfy_op(&self, other: &SmallBitmap, op: impl Fn(u64, u64) -> bool) -> bool {
|
pub fn any_satisfy_op(
|
||||||
|
&self,
|
||||||
|
other: &SmallBitmapInternal,
|
||||||
|
op: impl Fn(u64, u64) -> bool,
|
||||||
|
) -> bool {
|
||||||
match (self, other) {
|
match (self, other) {
|
||||||
(SmallBitmap::Tiny(a), SmallBitmap::Tiny(b)) => op(*a, *b),
|
(SmallBitmapInternal::Tiny(a), SmallBitmapInternal::Tiny(b)) => op(*a, *b),
|
||||||
(SmallBitmap::Small(a), SmallBitmap::Small(b)) => {
|
(SmallBitmapInternal::Small(a), SmallBitmapInternal::Small(b)) => {
|
||||||
assert!(a.len() == b.len());
|
assert!(a.len() == b.len());
|
||||||
for (a, b) in a.iter().zip(b.iter()) {
|
for (a, b) in a.iter().zip(b.iter()) {
|
||||||
if op(*a, *b) {
|
if op(*a, *b) {
|
||||||
@ -133,32 +217,32 @@ impl SmallBitmap {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn is_subset(&self, other: &SmallBitmap) -> bool {
|
pub fn is_subset(&self, other: &SmallBitmapInternal) -> bool {
|
||||||
self.all_satisfy_op(other, |a, b| a & !b == 0)
|
self.all_satisfy_op(other, |a, b| a & !b == 0)
|
||||||
}
|
}
|
||||||
pub fn intersects(&self, other: &SmallBitmap) -> bool {
|
pub fn intersects(&self, other: &SmallBitmapInternal) -> bool {
|
||||||
self.any_satisfy_op(other, |a, b| a & b != 0)
|
self.any_satisfy_op(other, |a, b| a & b != 0)
|
||||||
}
|
}
|
||||||
pub fn iter(&self) -> SmallBitmapIter<'_> {
|
pub fn iter(&self) -> SmallBitmapInternalIter<'_> {
|
||||||
match self {
|
match self {
|
||||||
SmallBitmap::Tiny(x) => SmallBitmapIter::Tiny(*x),
|
SmallBitmapInternal::Tiny(x) => SmallBitmapInternalIter::Tiny(*x),
|
||||||
SmallBitmap::Small(xs) => {
|
SmallBitmapInternal::Small(xs) => {
|
||||||
SmallBitmapIter::Small { cur: xs[0], next: &xs[1..], base: 0 }
|
SmallBitmapInternalIter::Small { cur: xs[0], next: &xs[1..], base: 0 }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum SmallBitmapIter<'b> {
|
pub enum SmallBitmapInternalIter<'b> {
|
||||||
Tiny(u64),
|
Tiny(u64),
|
||||||
Small { cur: u64, next: &'b [u64], base: u16 },
|
Small { cur: u64, next: &'b [u64], base: u16 },
|
||||||
}
|
}
|
||||||
impl<'b> Iterator for SmallBitmapIter<'b> {
|
impl<'b> Iterator for SmallBitmapInternalIter<'b> {
|
||||||
type Item = u16;
|
type Item = u16;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
match self {
|
match self {
|
||||||
SmallBitmapIter::Tiny(set) => {
|
SmallBitmapInternalIter::Tiny(set) => {
|
||||||
if *set > 0 {
|
if *set > 0 {
|
||||||
let idx = set.trailing_zeros() as u16;
|
let idx = set.trailing_zeros() as u16;
|
||||||
*set &= *set - 1;
|
*set &= *set - 1;
|
||||||
@ -167,7 +251,7 @@ impl<'b> Iterator for SmallBitmapIter<'b> {
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
SmallBitmapIter::Small { cur, next, base } => {
|
SmallBitmapInternalIter::Small { cur, next, base } => {
|
||||||
if *cur > 0 {
|
if *cur > 0 {
|
||||||
let idx = cur.trailing_zeros() as u16;
|
let idx = cur.trailing_zeros() as u16;
|
||||||
*cur &= *cur - 1;
|
*cur &= *cur - 1;
|
||||||
@ -185,23 +269,23 @@ impl<'b> Iterator for SmallBitmapIter<'b> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
// #[cfg(test)]
|
||||||
mod tests {
|
// mod tests {
|
||||||
use super::SmallBitmap;
|
// use super::SmallBitmap;
|
||||||
|
|
||||||
#[test]
|
// #[test]
|
||||||
fn test_small_bitmap() {
|
// fn test_small_bitmap() {
|
||||||
let mut bitmap1 = SmallBitmap::new(32);
|
// let mut bitmap1 = SmallBitmap::new(32);
|
||||||
for x in 0..16 {
|
// for x in 0..16 {
|
||||||
bitmap1.insert(x * 2);
|
// bitmap1.insert(x * 2);
|
||||||
}
|
// }
|
||||||
let mut bitmap2 = SmallBitmap::new(32);
|
// let mut bitmap2 = SmallBitmap::new(32);
|
||||||
for x in 0..=10 {
|
// for x in 0..=10 {
|
||||||
bitmap2.insert(x * 3);
|
// bitmap2.insert(x * 3);
|
||||||
}
|
// }
|
||||||
bitmap1.intersection(&bitmap2);
|
// bitmap1.intersection(&bitmap2);
|
||||||
for v in bitmap1.iter() {
|
// for v in bitmap1.iter() {
|
||||||
println!("{v}");
|
// println!("{v}");
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
@ -3,8 +3,9 @@ use std::collections::BTreeSet;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::logger::SearchLogger;
|
use super::logger::SearchLogger;
|
||||||
|
use super::query_graph::QueryNodeData;
|
||||||
use super::resolve_query_graph::resolve_query_graph;
|
use super::resolve_query_graph::resolve_query_graph;
|
||||||
use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput, SearchContext};
|
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
||||||
use crate::{Result, TermsMatchingStrategy};
|
use crate::{Result, TermsMatchingStrategy};
|
||||||
|
|
||||||
pub struct Words {
|
pub struct Words {
|
||||||
@ -43,12 +44,12 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for Words {
|
|||||||
let positions_to_remove = match self.terms_matching_strategy {
|
let positions_to_remove = match self.terms_matching_strategy {
|
||||||
TermsMatchingStrategy::Last => {
|
TermsMatchingStrategy::Last => {
|
||||||
let mut all_positions = BTreeSet::new();
|
let mut all_positions = BTreeSet::new();
|
||||||
for n in parent_query_graph.nodes.iter() {
|
for (_, n) in parent_query_graph.nodes.iter() {
|
||||||
match n {
|
match &n.data {
|
||||||
QueryNode::Term(term) => {
|
QueryNodeData::Term(term) => {
|
||||||
all_positions.extend(term.positions.clone().into_iter());
|
all_positions.extend(term.positions.clone().into_iter());
|
||||||
}
|
}
|
||||||
QueryNode::Deleted | QueryNode::Start | QueryNode::End => {}
|
QueryNodeData::Deleted | QueryNodeData::Start | QueryNodeData::End => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut r: Vec<i8> = all_positions.into_iter().collect();
|
let mut r: Vec<i8> = all_positions.into_iter().collect();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user