2023-03-08 15:04:25 +01:00
|
|
|
/*! Module implementing the graph used for the graph-based ranking rules
|
|
|
|
and its related algorithms.
|
|
|
|
|
|
|
|
A ranking rule graph is built on top of the [`QueryGraph`]: the nodes stay
|
|
|
|
the same but the edges are replaced.
|
|
|
|
*/
|
|
|
|
|
2023-03-06 08:35:01 +01:00
|
|
|
mod build;
|
|
|
|
mod cheapest_paths;
|
2023-03-16 11:52:51 +01:00
|
|
|
mod condition_docids_cache;
|
2023-03-19 14:30:19 +01:00
|
|
|
mod dead_ends_cache;
|
2023-03-08 15:04:25 +01:00
|
|
|
|
|
|
|
/// Implementation of the `proximity` ranking rule
|
2023-03-06 08:35:01 +01:00
|
|
|
mod proximity;
|
2023-03-08 15:04:25 +01:00
|
|
|
/// Implementation of the `typo` ranking rule
|
2023-03-06 08:35:01 +01:00
|
|
|
mod typo;
|
|
|
|
|
2023-03-15 16:08:43 +01:00
|
|
|
use std::collections::HashSet;
|
2023-03-13 12:46:32 +01:00
|
|
|
use std::hash::Hash;
|
|
|
|
|
2023-03-16 11:49:23 +01:00
|
|
|
pub use condition_docids_cache::ConditionDocIdsCache;
|
2023-03-19 14:30:19 +01:00
|
|
|
pub use dead_ends_cache::DeadEndsCache;
|
2023-03-15 12:52:40 +01:00
|
|
|
pub use proximity::{ProximityCondition, ProximityGraph};
|
2023-02-21 09:46:00 +01:00
|
|
|
use roaring::RoaringBitmap;
|
2023-03-16 11:49:23 +01:00
|
|
|
pub use typo::{TypoCondition, TypoGraph};
|
2023-02-21 09:46:00 +01:00
|
|
|
|
2023-03-14 16:37:47 +01:00
|
|
|
use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner};
|
2023-03-08 09:55:53 +01:00
|
|
|
use super::logger::SearchLogger;
|
2023-03-15 16:08:43 +01:00
|
|
|
use super::query_term::Phrase;
|
2023-03-08 09:55:53 +01:00
|
|
|
use super::small_bitmap::SmallBitmap;
|
|
|
|
use super::{QueryGraph, QueryNode, SearchContext};
|
|
|
|
use crate::Result;
|
|
|
|
|
2023-03-08 15:04:25 +01:00
|
|
|
/// An edge in the ranking rule graph.
|
|
|
|
///
|
|
|
|
/// It contains:
|
|
|
|
/// 1. The source and destination nodes
|
|
|
|
/// 2. The cost of traversing this edge
|
|
|
|
/// 3. The condition associated with it
|
2023-03-13 12:46:32 +01:00
|
|
|
#[derive(Clone)]
|
2023-02-21 09:46:00 +01:00
|
|
|
pub struct Edge<E> {
|
2023-03-14 16:37:47 +01:00
|
|
|
pub source_node: Interned<QueryNode>,
|
|
|
|
pub dest_node: Interned<QueryNode>,
|
2023-02-23 13:13:19 +01:00
|
|
|
pub cost: u8,
|
2023-03-16 11:52:51 +01:00
|
|
|
pub condition: Option<Interned<E>>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<E> Hash for Edge<E> {
|
|
|
|
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
|
|
|
self.source_node.hash(state);
|
|
|
|
self.dest_node.hash(state);
|
|
|
|
self.cost.hash(state);
|
|
|
|
self.condition.hash(state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<E> Eq for Edge<E> {}
|
|
|
|
|
|
|
|
impl<E> PartialEq for Edge<E> {
|
|
|
|
fn eq(&self, other: &Self) -> bool {
|
|
|
|
self.source_node == other.source_node
|
|
|
|
&& self.dest_node == other.dest_node
|
|
|
|
&& self.cost == other.cost
|
|
|
|
&& self.condition == other.condition
|
|
|
|
}
|
2023-02-21 09:46:00 +01:00
|
|
|
}
|
|
|
|
|
2023-03-08 15:04:25 +01:00
|
|
|
/// A trait to be implemented by a marker type to build a graph-based ranking rule.
|
|
|
|
///
|
|
|
|
/// It mostly describes how to:
|
|
|
|
/// 1. Retrieve the set of edges (their cost and condition) between two nodes.
|
|
|
|
/// 2. Compute the document ids satisfying a condition
|
2023-02-23 13:13:19 +01:00
|
|
|
pub trait RankingRuleGraphTrait: Sized {
|
2023-03-08 15:04:25 +01:00
|
|
|
/// The condition of an edge connecting two query nodes. The condition
|
2023-02-21 12:33:32 +01:00
|
|
|
/// should be sufficient to compute the edge's cost and associated document ids
|
2023-03-16 11:49:23 +01:00
|
|
|
/// in [`resolve_condition`](RankingRuleGraphTrait::resolve_condition).
|
|
|
|
type Condition: Sized + Clone + PartialEq + Eq + Hash;
|
2023-02-21 12:33:32 +01:00
|
|
|
|
2023-03-08 15:04:25 +01:00
|
|
|
/// Return the label of the given edge condition, to be used when visualising
|
|
|
|
/// the ranking rule graph.
|
2023-03-16 11:49:23 +01:00
|
|
|
fn label_for_condition<'ctx>(
|
2023-03-15 16:08:43 +01:00
|
|
|
ctx: &mut SearchContext<'ctx>,
|
2023-03-16 11:49:23 +01:00
|
|
|
condition: &Self::Condition,
|
2023-03-15 16:08:43 +01:00
|
|
|
) -> Result<String>;
|
|
|
|
|
2023-03-16 11:49:23 +01:00
|
|
|
fn words_used_by_condition<'ctx>(
|
2023-03-15 16:08:43 +01:00
|
|
|
ctx: &mut SearchContext<'ctx>,
|
2023-03-16 11:49:23 +01:00
|
|
|
condition: &Self::Condition,
|
2023-03-15 16:08:43 +01:00
|
|
|
) -> Result<HashSet<Interned<String>>>;
|
2023-03-16 11:49:23 +01:00
|
|
|
|
|
|
|
fn phrases_used_by_condition<'ctx>(
|
2023-03-15 16:08:43 +01:00
|
|
|
ctx: &mut SearchContext<'ctx>,
|
2023-03-16 11:49:23 +01:00
|
|
|
condition: &Self::Condition,
|
2023-03-15 16:08:43 +01:00
|
|
|
) -> Result<HashSet<Interned<Phrase>>>;
|
2023-02-21 09:46:00 +01:00
|
|
|
|
2023-03-08 15:04:25 +01:00
|
|
|
/// Compute the document ids associated with the given edge condition,
|
|
|
|
/// restricted to the given universe.
|
2023-03-16 11:49:23 +01:00
|
|
|
fn resolve_condition<'ctx>(
|
2023-03-13 14:03:48 +01:00
|
|
|
ctx: &mut SearchContext<'ctx>,
|
2023-03-16 11:49:23 +01:00
|
|
|
condition: &Self::Condition,
|
2023-03-07 14:42:58 +01:00
|
|
|
universe: &RoaringBitmap,
|
2023-02-21 09:46:00 +01:00
|
|
|
) -> Result<RoaringBitmap>;
|
|
|
|
|
2023-03-16 11:49:23 +01:00
|
|
|
/// Return the costs and conditions of the edges going from the source node to the destination node
|
2023-03-14 10:54:55 +01:00
|
|
|
fn build_edges<'ctx>(
|
2023-03-13 14:03:48 +01:00
|
|
|
ctx: &mut SearchContext<'ctx>,
|
2023-03-16 11:49:23 +01:00
|
|
|
conditions_interner: &mut DedupInterner<Self::Condition>,
|
2023-03-14 10:54:55 +01:00
|
|
|
source_node: &QueryNode,
|
2023-03-08 15:04:25 +01:00
|
|
|
dest_node: &QueryNode,
|
2023-03-16 11:49:23 +01:00
|
|
|
) -> Result<Vec<(u8, Option<Interned<Self::Condition>>)>>;
|
2023-02-23 13:13:19 +01:00
|
|
|
|
|
|
|
fn log_state(
|
|
|
|
graph: &RankingRuleGraph<Self>,
|
2023-03-16 11:49:23 +01:00
|
|
|
paths: &[Vec<Interned<Self::Condition>>],
|
2023-03-19 15:03:57 +01:00
|
|
|
dead_ends_cache: &DeadEndsCache<Self::Condition>,
|
2023-03-02 21:27:42 +01:00
|
|
|
universe: &RoaringBitmap,
|
2023-03-19 15:03:57 +01:00
|
|
|
distances: &MappedInterner<Vec<u16>, QueryNode>,
|
2023-03-07 14:42:58 +01:00
|
|
|
cost: u16,
|
2023-02-23 13:13:19 +01:00
|
|
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
|
|
|
);
|
2023-02-21 09:46:00 +01:00
|
|
|
}
|
|
|
|
|
2023-03-08 15:04:25 +01:00
|
|
|
/// The graph used by graph-based ranking rules.
|
|
|
|
///
|
|
|
|
/// It is built on top of a [`QueryGraph`], keeping the same nodes
|
|
|
|
/// but replacing the edges.
|
2023-02-21 09:46:00 +01:00
|
|
|
pub struct RankingRuleGraph<G: RankingRuleGraphTrait> {
|
|
|
|
pub query_graph: QueryGraph,
|
2023-03-16 11:49:23 +01:00
|
|
|
pub edges_store: FixedSizeInterner<Option<Edge<G::Condition>>>,
|
|
|
|
pub edges_of_node: MappedInterner<SmallBitmap<Option<Edge<G::Condition>>>, QueryNode>,
|
|
|
|
pub conditions_interner: FixedSizeInterner<G::Condition>,
|
2023-02-21 09:46:00 +01:00
|
|
|
}
|
2023-03-13 14:03:48 +01:00
|
|
|
impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
|
|
|
|
fn clone(&self) -> Self {
|
|
|
|
Self {
|
|
|
|
query_graph: self.query_graph.clone(),
|
|
|
|
edges_store: self.edges_store.clone(),
|
|
|
|
edges_of_node: self.edges_of_node.clone(),
|
|
|
|
conditions_interner: self.conditions_interner.clone(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-02-21 09:46:00 +01:00
|
|
|
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
2023-03-14 16:37:47 +01:00
|
|
|
/// Remove all edges with the given condition
|
2023-03-16 11:49:23 +01:00
|
|
|
pub fn remove_edges_with_condition(&mut self, condition_to_remove: Interned<G::Condition>) {
|
2023-03-14 16:37:47 +01:00
|
|
|
for (edge_id, edge_opt) in self.edges_store.iter_mut() {
|
|
|
|
let Some(edge) = edge_opt.as_mut() else { continue };
|
2023-03-16 11:52:51 +01:00
|
|
|
let Some(condition) = edge.condition else { continue };
|
|
|
|
|
|
|
|
if condition == condition_to_remove {
|
|
|
|
let (source_node, _dest_node) = (edge.source_node, edge.dest_node);
|
|
|
|
*edge_opt = None;
|
|
|
|
self.edges_of_node.get_mut(source_node).remove(edge_id);
|
2023-03-14 16:37:47 +01:00
|
|
|
}
|
|
|
|
}
|
2023-02-21 09:46:00 +01:00
|
|
|
}
|
|
|
|
}
|