mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-19 09:35:51 +08:00
Speed up graph based ranking rule when a lot of different costs exist
This commit is contained in:
parent
3b2c8b9f25
commit
30fb1153cc
@ -309,11 +309,6 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
Ok(ControlFlow::Continue(()))
|
Ok(ControlFlow::Continue(()))
|
||||||
}
|
}
|
||||||
})?;
|
})?;
|
||||||
// if at_least_one {
|
|
||||||
// unsafe {
|
|
||||||
// println!("\n===== {id} COST: {cost} ==== PATHS: {COUNT_PATHS} ==== NODES: {COUNT_VISITED_NODES} ===== UNIVERSE: {universe}", id=self.id, universe=universe.len());
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
logger.log_internal_state(graph);
|
logger.log_internal_state(graph);
|
||||||
logger.log_internal_state(&good_paths);
|
logger.log_internal_state(&good_paths);
|
||||||
|
|
||||||
@ -337,8 +332,14 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
|
|
||||||
let next_query_graph = QueryGraph::build_from_paths(paths);
|
let next_query_graph = QueryGraph::build_from_paths(paths);
|
||||||
|
|
||||||
if !nodes_with_removed_outgoing_conditions.is_empty() {
|
#[allow(clippy::comparison_chain)]
|
||||||
graph.update_all_costs_before_nodes(&nodes_with_removed_outgoing_conditions, all_costs);
|
if nodes_with_removed_outgoing_conditions.len() == 1 {
|
||||||
|
graph.update_all_costs_before_node(
|
||||||
|
*nodes_with_removed_outgoing_conditions.first().unwrap(),
|
||||||
|
all_costs,
|
||||||
|
);
|
||||||
|
} else if nodes_with_removed_outgoing_conditions.len() > 1 {
|
||||||
|
*all_costs = graph.find_all_costs_to_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
self.state = Some(state);
|
self.state = Some(state);
|
||||||
|
@ -8,7 +8,6 @@ use crate::search::new::interner::Interner;
|
|||||||
use crate::Result;
|
use crate::Result;
|
||||||
use fxhash::{FxHashMap, FxHasher};
|
use fxhash::{FxHashMap, FxHasher};
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::collections::hash_map::Entry;
|
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::hash::{Hash, Hasher};
|
use std::hash::{Hash, Hasher};
|
||||||
|
|
||||||
@ -364,8 +363,6 @@ impl QueryGraph {
|
|||||||
└──│ b2 │──│ c2 │───│ d │───│ e2 │
|
└──│ b2 │──│ c2 │───│ d │───│ e2 │
|
||||||
└────┘ └────┘ └────┘ └────┘
|
└────┘ └────┘ └────┘ └────┘
|
||||||
```
|
```
|
||||||
But we accept the first representation as it reduces the size
|
|
||||||
of the graph and shouldn't cause much problems.
|
|
||||||
*/
|
*/
|
||||||
pub fn build_from_paths(
|
pub fn build_from_paths(
|
||||||
paths: Vec<Vec<(Option<LocatedQueryTermSubset>, LocatedQueryTermSubset)>>,
|
paths: Vec<Vec<(Option<LocatedQueryTermSubset>, LocatedQueryTermSubset)>>,
|
||||||
|
@ -1,8 +1,11 @@
|
|||||||
#![allow(clippy::too_many_arguments)]
|
#![allow(clippy::too_many_arguments)]
|
||||||
|
|
||||||
use std::collections::{BTreeSet, VecDeque};
|
use std::collections::{BTreeSet, VecDeque};
|
||||||
|
use std::iter::FromIterator;
|
||||||
use std::ops::ControlFlow;
|
use std::ops::ControlFlow;
|
||||||
|
|
||||||
|
use fxhash::FxHashSet;
|
||||||
|
|
||||||
use super::{DeadEndsCache, RankingRuleGraph, RankingRuleGraphTrait};
|
use super::{DeadEndsCache, RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
use crate::search::new::interner::{Interned, MappedInterner};
|
use crate::search::new::interner::{Interned, MappedInterner};
|
||||||
use crate::search::new::query_graph::QueryNode;
|
use crate::search::new::query_graph::QueryNode;
|
||||||
@ -112,9 +115,6 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// if there wasn't any valid path from this node to the end node, then
|
|
||||||
// this node is a dead end **for this specific cost**.
|
|
||||||
// we could encode this in the dead-ends cache
|
|
||||||
|
|
||||||
Ok(ControlFlow::Continue(any_valid))
|
Ok(ControlFlow::Continue(any_valid))
|
||||||
}
|
}
|
||||||
@ -126,11 +126,11 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
|||||||
visit: VisitFn<G>,
|
visit: VisitFn<G>,
|
||||||
ctx: &mut VisitorContext<G>,
|
ctx: &mut VisitorContext<G>,
|
||||||
) -> Result<ControlFlow<(), bool>> {
|
) -> Result<ControlFlow<(), bool>> {
|
||||||
if ctx
|
if !ctx
|
||||||
.all_costs_from_node
|
.all_costs_from_node
|
||||||
.get(dest_node)
|
.get(dest_node)
|
||||||
.iter()
|
.iter()
|
||||||
.all(|next_cost| *next_cost != self.remaining_cost)
|
.any(|next_cost| *next_cost == self.remaining_cost)
|
||||||
{
|
{
|
||||||
return Ok(ControlFlow::Continue(false));
|
return Ok(ControlFlow::Continue(false));
|
||||||
}
|
}
|
||||||
@ -158,14 +158,12 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
|||||||
) -> Result<ControlFlow<(), bool>> {
|
) -> Result<ControlFlow<(), bool>> {
|
||||||
assert!(dest_node != ctx.graph.query_graph.end_node);
|
assert!(dest_node != ctx.graph.query_graph.end_node);
|
||||||
|
|
||||||
if self.forbidden_conditions_to_nodes.contains(dest_node)
|
if self.forbidden_conditions.contains(condition)
|
||||||
|
|| self.forbidden_conditions_to_nodes.contains(dest_node)
|
||||||
|| edge_new_nodes_to_skip.intersects(&self.visited_nodes)
|
|| edge_new_nodes_to_skip.intersects(&self.visited_nodes)
|
||||||
{
|
{
|
||||||
return Ok(ControlFlow::Continue(false));
|
return Ok(ControlFlow::Continue(false));
|
||||||
}
|
}
|
||||||
if self.forbidden_conditions.contains(condition) {
|
|
||||||
return Ok(ControlFlow::Continue(false));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Checking that from the destination node, there is at least
|
// Checking that from the destination node, there is at least
|
||||||
// one cost that we can visit that corresponds to our remaining budget.
|
// one cost that we can visit that corresponds to our remaining budget.
|
||||||
@ -244,48 +242,41 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
costs_to_end
|
costs_to_end
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn update_all_costs_before_nodes(
|
pub fn update_all_costs_before_node(
|
||||||
&self,
|
&self,
|
||||||
removed_nodes: &BTreeSet<Interned<QueryNode>>,
|
node_with_removed_outgoing_conditions: Interned<QueryNode>,
|
||||||
costs: &mut MappedInterner<QueryNode, Vec<u64>>,
|
costs: &mut MappedInterner<QueryNode, Vec<u64>>,
|
||||||
) {
|
) {
|
||||||
// unsafe {
|
|
||||||
// FIND_ALL_COSTS_INC_COUNT += 1;
|
|
||||||
// println!(
|
|
||||||
// "update_all_costs_after_removing_edge incrementally count: {}",
|
|
||||||
// FIND_ALL_COSTS_INC_COUNT
|
|
||||||
// );
|
|
||||||
// }
|
|
||||||
|
|
||||||
let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len());
|
let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len());
|
||||||
let mut node_stack = VecDeque::new();
|
let mut node_stack = VecDeque::new();
|
||||||
|
|
||||||
for node in removed_nodes.iter() {
|
enqueued.insert(node_with_removed_outgoing_conditions);
|
||||||
enqueued.insert(*node);
|
node_stack.push_back(node_with_removed_outgoing_conditions);
|
||||||
node_stack.push_back(*node);
|
|
||||||
}
|
|
||||||
|
|
||||||
while let Some(cur_node) = node_stack.pop_front() {
|
'main_loop: while let Some(cur_node) = node_stack.pop_front() {
|
||||||
let mut self_costs = BTreeSet::<u64>::new();
|
let mut costs_to_remove = FxHashSet::default();
|
||||||
|
for c in costs.get(cur_node) {
|
||||||
|
costs_to_remove.insert(*c);
|
||||||
|
}
|
||||||
|
|
||||||
let cur_node_edges = &self.edges_of_node.get(cur_node);
|
let cur_node_edges = &self.edges_of_node.get(cur_node);
|
||||||
for edge_idx in cur_node_edges.iter() {
|
for edge_idx in cur_node_edges.iter() {
|
||||||
let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
|
let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
|
||||||
let succ_node = edge.dest_node;
|
for cost in costs.get(edge.dest_node).iter() {
|
||||||
let succ_costs = costs.get(succ_node);
|
costs_to_remove.remove(&(*cost + edge.cost as u64));
|
||||||
for succ_distance in succ_costs {
|
if costs_to_remove.is_empty() {
|
||||||
self_costs.insert(edge.cost as u64 + succ_distance);
|
continue 'main_loop;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let costs_to_end_cur_node = costs.get_mut(cur_node);
|
|
||||||
for cost in self_costs.iter() {
|
|
||||||
costs_to_end_cur_node.push(*cost);
|
|
||||||
}
|
}
|
||||||
let self_costs = self_costs.into_iter().collect::<Vec<_>>();
|
if costs_to_remove.is_empty() {
|
||||||
if &self_costs == costs.get(cur_node) {
|
continue 'main_loop;
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
*costs.get_mut(cur_node) = self_costs;
|
let mut new_costs = BTreeSet::from_iter(costs.get(cur_node).iter().copied());
|
||||||
|
for c in costs_to_remove {
|
||||||
|
new_costs.remove(&c);
|
||||||
|
}
|
||||||
|
*costs.get_mut(cur_node) = new_costs.into_iter().collect();
|
||||||
|
|
||||||
for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
|
for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
|
||||||
if !enqueued.contains(prev_node) {
|
if !enqueued.contains(prev_node) {
|
||||||
|
Loading…
Reference in New Issue
Block a user