From c64585352940fae26a6c75de5750459bf53fd499 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 21 Feb 2023 09:48:49 +0100 Subject: [PATCH] Introduce a generic graph-based ranking rule --- .../search/new/graph_based_ranking_rule.rs | 166 ++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 milli/src/search/new/graph_based_ranking_rule.rs diff --git a/milli/src/search/new/graph_based_ranking_rule.rs b/milli/src/search/new/graph_based_ranking_rule.rs new file mode 100644 index 000000000..0f72b9d5d --- /dev/null +++ b/milli/src/search/new/graph_based_ranking_rule.rs @@ -0,0 +1,166 @@ +use heed::RoTxn; +use roaring::RoaringBitmap; + +use crate::{ + new::ranking_rule_graph::cheapest_paths::{self, Path}, + Index, Result, +}; + +use super::{ + db_cache::DatabaseCache, + ranking_rule_graph::{ + cheapest_paths::KCheapestPathsState, edge_docids_cache::EdgeDocidsCache, + empty_paths_cache::EmptyPathsCache, paths_map::PathsMap, RankingRuleGraph, + RankingRuleGraphTrait, + }, + QueryGraph, RankingRule, RankingRuleOutput, +}; + +pub struct GraphBasedRankingRule { + state: Option>, +} +impl Default for GraphBasedRankingRule { + fn default() -> Self { + Self { state: None } + } +} + +pub struct GraphBasedRankingRuleState { + graph: RankingRuleGraph, + cheapest_paths_state: Option, + edge_docids_cache: EdgeDocidsCache, + empty_paths_cache: EmptyPathsCache, +} + +impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGraph> + for GraphBasedRankingRule +{ + fn start_iteration( + &mut self, + index: &Index, + txn: &'transaction RoTxn, + db_cache: &mut DatabaseCache<'transaction>, + universe: &RoaringBitmap, + query_graph: &QueryGraph, + ) -> Result<()> { + // if let Some(state) = &mut self.state { + // // TODO: update the previous state + // // TODO: update the existing graph incrementally, based on a diff + + // } else { + let graph = RankingRuleGraph::build(index, txn, db_cache, query_graph.clone())?; + // println!("Initialized Proximity Ranking Rule."); + // println!("GRAPH:"); + // let graphviz = graph.graphviz(); + // println!("{graphviz}"); + + let cheapest_paths_state = KCheapestPathsState::new(&graph); + let state = GraphBasedRankingRuleState { + graph, + cheapest_paths_state, + edge_docids_cache: <_>::default(), + empty_paths_cache: <_>::default(), + }; + + // let desc = state.graph.graphviz_with_path( + // &state.cheapest_paths_state.as_ref().unwrap().kth_cheapest_path.clone(), + // ); + // println!("Cheapest path: {desc}"); + + self.state = Some(state); + // } + + Ok(()) + } + + fn next_bucket( + &mut self, + index: &Index, + txn: &'transaction RoTxn, + db_cache: &mut DatabaseCache<'transaction>, + universe: &RoaringBitmap, + ) -> Result>> { + assert!(universe.len() > 1); + let mut state = self.state.take().unwrap(); + + let Some(cheapest_paths_state) = state.cheapest_paths_state.take() else { + return Ok(None); + }; + // println!("Proximity: Next Bucket"); + + let mut paths = PathsMap::default(); + + // let desc = state.graph.dot_description_with_path(&cheapest_paths_state.kth_cheapest_path); + // println!("CHeapest Path: {desc}"); + // TODO: when does it return None? -> when there is no cheapest path + // How to handle it? -> ... return all document ids from the universe? + // + // TODO: Give an empty_edge and empty_prefix argument to the + // compute_paths_of_next_lowest_cost function + if let Some(next_cheapest_paths_state) = cheapest_paths_state + .compute_paths_of_next_lowest_cost( + &mut state.graph, + &state.empty_paths_cache, + &mut paths, + ) + { + state.cheapest_paths_state = Some(next_cheapest_paths_state); + } else { + state.cheapest_paths_state = None; + // If returns None if there are no longer any paths to compute + // BUT! paths_map may not be empty, and we need to compute the current bucket still + } + + // println!("PATHS: {}", paths.graphviz(&state.graph)); + + // paths.iterate(|path, cost| { + // let desc = state.graph.graphviz_with_path(&Path { edges: path.clone(), cost: *cost }); + // println!("Path to resolve of cost {cost}: {desc}"); + // }); + + // let desc = state.graph.dot_description_with_path( + // &state.cheapest_paths_state.as_ref().unwrap().kth_cheapest_path.clone(), + // ); + // println!("Cheapest path: {desc}"); + + // TODO: verify that this is correct + // If the paths are empty, we should probably return the universe? + // BUT! Is there a case where the paths are empty AND the universe is + // not empty? + if paths.is_empty() { + self.state = None; + return Ok(None); + } + // Here, log all the paths? + + let bucket = state.graph.resolve_paths( + index, + txn, + db_cache, + &mut state.edge_docids_cache, + &mut state.empty_paths_cache, + universe, + paths, + )?; + // The call above also updated the graph such that it doesn't contain the empty edges anymore. + // println!("Resolved all the paths: {bucket:?} from universe {:?}", state.universe); + // let graphviz = state.graph.graphviz(); + // println!("{graphviz}"); + + let next_query_graph = state.graph.query_graph.clone(); + + self.state = Some(state); + + Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket })) + } + + fn end_iteration( + &mut self, + _index: &Index, + _txn: &'transaction RoTxn, + _db_cache: &mut DatabaseCache<'transaction>, + ) { + // println!("PROXIMITY: end iteration"); + self.state = None; + } +}