Speed up graph based ranking rule when a lot of different costs exist

This commit is contained in:
Loïc Lecrenier 2023-05-01 15:33:28 +02:00
parent 3b2c8b9f25
commit 30fb1153cc
3 changed files with 35 additions and 46 deletions

View File

@ -309,11 +309,6 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
Ok(ControlFlow::Continue(())) Ok(ControlFlow::Continue(()))
} }
})?; })?;
// if at_least_one {
// unsafe {
// println!("\n===== {id} COST: {cost} ==== PATHS: {COUNT_PATHS} ==== NODES: {COUNT_VISITED_NODES} ===== UNIVERSE: {universe}", id=self.id, universe=universe.len());
// }
// }
logger.log_internal_state(graph); logger.log_internal_state(graph);
logger.log_internal_state(&good_paths); logger.log_internal_state(&good_paths);
@ -337,8 +332,14 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
let next_query_graph = QueryGraph::build_from_paths(paths); let next_query_graph = QueryGraph::build_from_paths(paths);
if !nodes_with_removed_outgoing_conditions.is_empty() { #[allow(clippy::comparison_chain)]
graph.update_all_costs_before_nodes(&nodes_with_removed_outgoing_conditions, all_costs); if nodes_with_removed_outgoing_conditions.len() == 1 {
graph.update_all_costs_before_node(
*nodes_with_removed_outgoing_conditions.first().unwrap(),
all_costs,
);
} else if nodes_with_removed_outgoing_conditions.len() > 1 {
*all_costs = graph.find_all_costs_to_end();
} }
self.state = Some(state); self.state = Some(state);

View File

@ -8,7 +8,6 @@ use crate::search::new::interner::Interner;
use crate::Result; use crate::Result;
use fxhash::{FxHashMap, FxHasher}; use fxhash::{FxHashMap, FxHasher};
use std::cmp::Ordering; use std::cmp::Ordering;
use std::collections::hash_map::Entry;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
@ -364,8 +363,6 @@ impl QueryGraph {
b2 c2 d e2 b2 c2 d e2
``` ```
But we accept the first representation as it reduces the size
of the graph and shouldn't cause much problems.
*/ */
pub fn build_from_paths( pub fn build_from_paths(
paths: Vec<Vec<(Option<LocatedQueryTermSubset>, LocatedQueryTermSubset)>>, paths: Vec<Vec<(Option<LocatedQueryTermSubset>, LocatedQueryTermSubset)>>,

View File

@ -1,8 +1,11 @@
#![allow(clippy::too_many_arguments)] #![allow(clippy::too_many_arguments)]
use std::collections::{BTreeSet, VecDeque}; use std::collections::{BTreeSet, VecDeque};
use std::iter::FromIterator;
use std::ops::ControlFlow; use std::ops::ControlFlow;
use fxhash::FxHashSet;
use super::{DeadEndsCache, RankingRuleGraph, RankingRuleGraphTrait}; use super::{DeadEndsCache, RankingRuleGraph, RankingRuleGraphTrait};
use crate::search::new::interner::{Interned, MappedInterner}; use crate::search::new::interner::{Interned, MappedInterner};
use crate::search::new::query_graph::QueryNode; use crate::search::new::query_graph::QueryNode;
@ -112,9 +115,6 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
} }
} }
} }
// if there wasn't any valid path from this node to the end node, then
// this node is a dead end **for this specific cost**.
// we could encode this in the dead-ends cache
Ok(ControlFlow::Continue(any_valid)) Ok(ControlFlow::Continue(any_valid))
} }
@ -126,11 +126,11 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
visit: VisitFn<G>, visit: VisitFn<G>,
ctx: &mut VisitorContext<G>, ctx: &mut VisitorContext<G>,
) -> Result<ControlFlow<(), bool>> { ) -> Result<ControlFlow<(), bool>> {
if ctx if !ctx
.all_costs_from_node .all_costs_from_node
.get(dest_node) .get(dest_node)
.iter() .iter()
.all(|next_cost| *next_cost != self.remaining_cost) .any(|next_cost| *next_cost == self.remaining_cost)
{ {
return Ok(ControlFlow::Continue(false)); return Ok(ControlFlow::Continue(false));
} }
@ -158,14 +158,12 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
) -> Result<ControlFlow<(), bool>> { ) -> Result<ControlFlow<(), bool>> {
assert!(dest_node != ctx.graph.query_graph.end_node); assert!(dest_node != ctx.graph.query_graph.end_node);
if self.forbidden_conditions_to_nodes.contains(dest_node) if self.forbidden_conditions.contains(condition)
|| self.forbidden_conditions_to_nodes.contains(dest_node)
|| edge_new_nodes_to_skip.intersects(&self.visited_nodes) || edge_new_nodes_to_skip.intersects(&self.visited_nodes)
{ {
return Ok(ControlFlow::Continue(false)); return Ok(ControlFlow::Continue(false));
} }
if self.forbidden_conditions.contains(condition) {
return Ok(ControlFlow::Continue(false));
}
// Checking that from the destination node, there is at least // Checking that from the destination node, there is at least
// one cost that we can visit that corresponds to our remaining budget. // one cost that we can visit that corresponds to our remaining budget.
@ -244,48 +242,41 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
costs_to_end costs_to_end
} }
pub fn update_all_costs_before_nodes( pub fn update_all_costs_before_node(
&self, &self,
removed_nodes: &BTreeSet<Interned<QueryNode>>, node_with_removed_outgoing_conditions: Interned<QueryNode>,
costs: &mut MappedInterner<QueryNode, Vec<u64>>, costs: &mut MappedInterner<QueryNode, Vec<u64>>,
) { ) {
// unsafe {
// FIND_ALL_COSTS_INC_COUNT += 1;
// println!(
// "update_all_costs_after_removing_edge incrementally count: {}",
// FIND_ALL_COSTS_INC_COUNT
// );
// }
let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len()); let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len());
let mut node_stack = VecDeque::new(); let mut node_stack = VecDeque::new();
for node in removed_nodes.iter() { enqueued.insert(node_with_removed_outgoing_conditions);
enqueued.insert(*node); node_stack.push_back(node_with_removed_outgoing_conditions);
node_stack.push_back(*node);
}
while let Some(cur_node) = node_stack.pop_front() { 'main_loop: while let Some(cur_node) = node_stack.pop_front() {
let mut self_costs = BTreeSet::<u64>::new(); let mut costs_to_remove = FxHashSet::default();
for c in costs.get(cur_node) {
costs_to_remove.insert(*c);
}
let cur_node_edges = &self.edges_of_node.get(cur_node); let cur_node_edges = &self.edges_of_node.get(cur_node);
for edge_idx in cur_node_edges.iter() { for edge_idx in cur_node_edges.iter() {
let edge = self.edges_store.get(edge_idx).as_ref().unwrap(); let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
let succ_node = edge.dest_node; for cost in costs.get(edge.dest_node).iter() {
let succ_costs = costs.get(succ_node); costs_to_remove.remove(&(*cost + edge.cost as u64));
for succ_distance in succ_costs { if costs_to_remove.is_empty() {
self_costs.insert(edge.cost as u64 + succ_distance); continue 'main_loop;
} }
} }
let costs_to_end_cur_node = costs.get_mut(cur_node);
for cost in self_costs.iter() {
costs_to_end_cur_node.push(*cost);
} }
let self_costs = self_costs.into_iter().collect::<Vec<_>>(); if costs_to_remove.is_empty() {
if &self_costs == costs.get(cur_node) { continue 'main_loop;
continue;
} }
*costs.get_mut(cur_node) = self_costs; let mut new_costs = BTreeSet::from_iter(costs.get(cur_node).iter().copied());
for c in costs_to_remove {
new_costs.remove(&c);
}
*costs.get_mut(cur_node) = new_costs.into_iter().collect();
for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() { for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
if !enqueued.contains(prev_node) { if !enqueued.contains(prev_node) {