diff --git a/milli/examples/search.rs b/milli/examples/search.rs index ff7d564c6..c9a3c1438 100644 --- a/milli/examples/search.rs +++ b/milli/examples/search.rs @@ -1,6 +1,6 @@ -use std::error::Error; use std::io::stdin; use std::time::Instant; +use std::{error::Error, path::Path}; use heed::EnvOpenOptions; use milli::{ @@ -19,7 +19,7 @@ fn main() -> Result<(), Box> { program_name ) }); - let detailed_logger = args.next(); + let detailed_logger_dir = args.next(); let print_documents: bool = if let Some(arg) = args.next() { arg == "print-documents" } else { false }; @@ -34,11 +34,11 @@ fn main() -> Result<(), Box> { let mut default_logger = DefaultSearchLogger; // FIXME: consider resetting the state of the logger between search executions as otherwise panics are possible. // Workaround'd here by recreating the logger on each iteration of the loop - let mut detailed_logger = detailed_logger + let mut detailed_logger = detailed_logger_dir .as_ref() - .map(|logger_dir| milli::DetailedSearchLogger::new(logger_dir)); + .map(|logger_dir| (milli::VisualSearchLogger::default(), logger_dir)); let logger: &mut dyn SearchLogger<_> = - if let Some(detailed_logger) = detailed_logger.as_mut() { + if let Some((detailed_logger, _)) = detailed_logger.as_mut() { detailed_logger } else { &mut default_logger @@ -61,8 +61,8 @@ fn main() -> Result<(), Box> { &mut DefaultSearchLogger, logger, )?; - if let Some(logger) = &detailed_logger { - logger.write_d2_description(&mut ctx); + if let Some((logger, dir)) = detailed_logger { + logger.finish(&mut ctx, Path::new(dir))?; } let elapsed = start.elapsed(); println!("new: {}us, docids: {:?}", elapsed.as_micros(), docs.documents_ids); diff --git a/milli/src/lib.rs b/milli/src/lib.rs index e134c8ceb..868df74e8 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -79,7 +79,7 @@ pub use filter_parser::{Condition, FilterCondition, Span, Token}; use fxhash::{FxHasher32, FxHasher64}; pub use grenad::CompressionType; pub use search::new::{ - execute_search, DefaultSearchLogger, DetailedSearchLogger, SearchContext, SearchLogger, + execute_search, DefaultSearchLogger, SearchContext, SearchLogger, VisualSearchLogger, }; use serde_json::Value; pub use {charabia as tokenizer, heed}; diff --git a/milli/src/search/new/graph_based_ranking_rule.rs b/milli/src/search/new/graph_based_ranking_rule.rs index 154bcd3b2..41a96dd9e 100644 --- a/milli/src/search/new/graph_based_ranking_rule.rs +++ b/milli/src/search/new/graph_based_ranking_rule.rs @@ -270,6 +270,9 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase } })?; + logger.log_internal_state(graph); + logger.log_internal_state(&good_paths); + // We modify the next query graph so that it only contains the subgraph // that was used to compute this bucket // But we only do it in case the bucket length is >1, because otherwise diff --git a/milli/src/search/new/logger/detailed.rs b/milli/src/search/new/logger/detailed.rs deleted file mode 100644 index 0efc457f6..000000000 --- a/milli/src/search/new/logger/detailed.rs +++ /dev/null @@ -1,563 +0,0 @@ -use std::any::Any; -use std::fs::File; -use std::io::Write; -use std::path::PathBuf; -use std::time::Instant; - -// use rand::random; -use roaring::RoaringBitmap; - -use crate::search::new::graph_based_ranking_rule::Typo; -use crate::search::new::interner::{Interned, MappedInterner}; -use crate::search::new::query_graph::QueryNodeData; -use crate::search::new::query_term::LocatedQueryTermSubset; -use crate::search::new::ranking_rule_graph::{ - DeadEndsCache, Edge, ProximityCondition, ProximityGraph, RankingRuleGraph, - RankingRuleGraphTrait, TypoCondition, TypoGraph, -}; -use crate::search::new::ranking_rules::BoxRankingRule; -use crate::search::new::sort::Sort; -use crate::search::new::words::Words; -use crate::search::new::{QueryGraph, QueryNode, RankingRule, SearchContext, SearchLogger}; - -pub enum SearchEvents { - RankingRuleStartIteration { - ranking_rule_idx: usize, - query: QueryGraph, - universe: RoaringBitmap, - time: Instant, - }, - RankingRuleNextBucket { - ranking_rule_idx: usize, - universe: RoaringBitmap, - candidates: RoaringBitmap, - time: Instant, - }, - RankingRuleEndIteration { - ranking_rule_idx: usize, - universe: RoaringBitmap, - time: Instant, - }, - ExtendResults { - new: Vec, - }, - WordsState { - query_graph: QueryGraph, - }, - ProximityState { - graph: RankingRuleGraph, - paths: Vec>>, - dead_ends_cache: DeadEndsCache, - universe: RoaringBitmap, - costs: MappedInterner>, - cost: u64, - }, - TypoState { - graph: RankingRuleGraph, - paths: Vec>>, - dead_ends_cache: DeadEndsCache, - universe: RoaringBitmap, - costs: MappedInterner>, - cost: u64, - }, - RankingRuleSkipBucket { - ranking_rule_idx: usize, - candidates: RoaringBitmap, - time: Instant, - }, -} - -pub struct DetailedSearchLogger { - folder_path: PathBuf, - initial_query: Option, - initial_query_time: Option, - query_for_universe: Option, - initial_universe: Option, - ranking_rules_ids: Option>, - events: Vec, -} -impl DetailedSearchLogger { - pub fn new(folder_path: &str) -> Self { - Self { - folder_path: PathBuf::new().join(folder_path), - initial_query: None, - initial_query_time: None, - query_for_universe: None, - initial_universe: None, - ranking_rules_ids: None, - events: vec![], - } - } -} - -impl SearchLogger for DetailedSearchLogger { - fn initial_query(&mut self, query: &QueryGraph) { - self.initial_query = Some(query.clone()); - self.initial_query_time = Some(Instant::now()); - } - - fn query_for_initial_universe(&mut self, query: &QueryGraph) { - self.query_for_universe = Some(query.clone()); - } - - fn initial_universe(&mut self, universe: &RoaringBitmap) { - self.initial_universe = Some(universe.clone()); - } - fn ranking_rules(&mut self, rr: &[BoxRankingRule]) { - self.ranking_rules_ids = Some(rr.iter().map(|rr| rr.id()).collect()); - } - - fn start_iteration_ranking_rule( - &mut self, - ranking_rule_idx: usize, - _ranking_rule: &dyn RankingRule, - query: &QueryGraph, - universe: &RoaringBitmap, - ) { - self.events.push(SearchEvents::RankingRuleStartIteration { - ranking_rule_idx, - query: query.clone(), - universe: universe.clone(), - time: Instant::now(), - }) - } - - fn next_bucket_ranking_rule( - &mut self, - ranking_rule_idx: usize, - _ranking_rule: &dyn RankingRule, - universe: &RoaringBitmap, - candidates: &RoaringBitmap, - ) { - self.events.push(SearchEvents::RankingRuleNextBucket { - ranking_rule_idx, - universe: universe.clone(), - candidates: candidates.clone(), - time: Instant::now(), - }) - } - fn skip_bucket_ranking_rule( - &mut self, - ranking_rule_idx: usize, - _ranking_rule: &dyn RankingRule, - candidates: &RoaringBitmap, - ) { - self.events.push(SearchEvents::RankingRuleSkipBucket { - ranking_rule_idx, - candidates: candidates.clone(), - time: Instant::now(), - }) - } - - fn end_iteration_ranking_rule( - &mut self, - ranking_rule_idx: usize, - _ranking_rule: &dyn RankingRule, - universe: &RoaringBitmap, - ) { - self.events.push(SearchEvents::RankingRuleEndIteration { - ranking_rule_idx, - universe: universe.clone(), - time: Instant::now(), - }) - } - fn add_to_results(&mut self, docids: &[u32]) { - self.events.push(SearchEvents::ExtendResults { new: docids.to_vec() }); - } - - /// Logs the internal state of the ranking rule - fn log_ranking_rule_state<'ctx>(&mut self, state: &(dyn Any + 'ctx)) { - if let Some(_words) = state.downcast_ref::() { - } else if let Some(_sort) = state.downcast_ref::>() { - } else if let Some(_typo) = state.downcast_ref::() { - } - } -} - -impl DetailedSearchLogger { - pub fn write_d2_description(&self, ctx: &mut SearchContext) { - let mut prev_time = self.initial_query_time.unwrap(); - let mut timestamp = vec![]; - fn activated_id(timestamp: &[usize]) -> String { - let mut s = String::new(); - s.push('0'); - for t in timestamp.iter() { - s.push_str(&format!("{t}")); - } - s - } - - let index_path = self.folder_path.join("index.d2"); - let mut file = std::fs::File::create(index_path).unwrap(); - writeln!(&mut file, "direction: right").unwrap(); - writeln!(&mut file, "Initial Query Graph: {{").unwrap(); - let initial_query_graph = self.initial_query.as_ref().unwrap(); - Self::query_graph_d2_description(ctx, initial_query_graph, &mut file); - writeln!(&mut file, "}}").unwrap(); - - writeln!(&mut file, "Query Graph Used To Compute Universe: {{").unwrap(); - let query_graph_for_universe = self.query_for_universe.as_ref().unwrap(); - Self::query_graph_d2_description(ctx, query_graph_for_universe, &mut file); - writeln!(&mut file, "}}").unwrap(); - - let initial_universe = self.initial_universe.as_ref().unwrap(); - writeln!(&mut file, "Initial Universe Length {}", initial_universe.len()).unwrap(); - - writeln!(&mut file, "Control Flow Between Ranking Rules: {{").unwrap(); - writeln!(&mut file, "shape: sequence_diagram").unwrap(); - for (idx, rr_id) in self.ranking_rules_ids.as_ref().unwrap().iter().enumerate() { - writeln!(&mut file, "{idx}: {rr_id}").unwrap(); - } - writeln!(&mut file, "results").unwrap(); - // writeln!(&mut file, "time").unwrap(); - for event in self.events.iter() { - match event { - SearchEvents::RankingRuleStartIteration { ranking_rule_idx, time, .. } => { - let _elapsed = time.duration_since(prev_time); - prev_time = *time; - let parent_activated_id = activated_id(×tamp); - timestamp.push(0); - let self_activated_id = activated_id(×tamp); - // writeln!(&mut file, "time.{self_activated_id}: {:.2}", elapsed.as_micros() as f64 / 1000.0).unwrap(); - if *ranking_rule_idx != 0 { - let parent_ranking_rule_idx = ranking_rule_idx - 1; - writeln!( - &mut file, - "{parent_ranking_rule_idx}.{parent_activated_id} -> {ranking_rule_idx}.{self_activated_id} : start iteration", - ) - .unwrap(); - } - writeln!( - &mut file, - "{ranking_rule_idx}.{self_activated_id} {{ - style {{ - fill: \"#D8A7B1\" - }} -}}" - ) - .unwrap(); - } - SearchEvents::RankingRuleNextBucket { - ranking_rule_idx, - time, - universe, - candidates, - } => { - let _elapsed = time.duration_since(prev_time); - prev_time = *time; - let old_activated_id = activated_id(×tamp); - // writeln!(&mut file, "time.{old_activated_id}: {:.2}", elapsed.as_micros() as f64 / 1000.0).unwrap(); - *timestamp.last_mut().unwrap() += 1; - let next_activated_id = activated_id(×tamp); - writeln!(&mut file, - "{ranking_rule_idx}.{old_activated_id} -> {ranking_rule_idx}.{next_activated_id} : next bucket {}/{}", candidates.len(), universe.len()) - .unwrap(); - } - SearchEvents::RankingRuleSkipBucket { ranking_rule_idx, candidates, time } => { - let _elapsed = time.duration_since(prev_time); - prev_time = *time; - let old_activated_id = activated_id(×tamp); - // writeln!(&mut file, "time.{old_activated_id}: {:.2}", elapsed.as_micros() as f64 / 1000.0).unwrap(); - *timestamp.last_mut().unwrap() += 1; - let next_activated_id = activated_id(×tamp); - let len = candidates.len(); - writeln!(&mut file, - "{ranking_rule_idx}.{old_activated_id} -> {ranking_rule_idx}.{next_activated_id} : skip bucket ({len})",) - .unwrap(); - } - SearchEvents::RankingRuleEndIteration { ranking_rule_idx, time, .. } => { - let _elapsed = time.duration_since(prev_time); - prev_time = *time; - let cur_activated_id = activated_id(×tamp); - // writeln!(&mut file, "time.{cur_activated_id}: {:.2}", elapsed.as_micros() as f64 / 1000.0).unwrap(); - - timestamp.pop(); - let parent_activated_id = activated_id(×tamp); - let parent_ranking_rule = if *ranking_rule_idx == 0 { - "start".to_owned() - } else { - format!("{}.{parent_activated_id}", ranking_rule_idx - 1) - }; - writeln!( - &mut file, - "{ranking_rule_idx}.{cur_activated_id} -> {parent_ranking_rule} : end iteration", - ) - .unwrap(); - } - SearchEvents::ExtendResults { new } => { - if new.is_empty() { - continue; - } - let cur_ranking_rule = timestamp.len() - 1; - let cur_activated_id = activated_id(×tamp); - let docids = new.iter().collect::>(); - let len = new.len(); - - writeln!( - &mut file, - "{cur_ranking_rule}.{cur_activated_id} -> results.{cur_ranking_rule}{cur_activated_id} : \"add {len}\" -results.{cur_ranking_rule}{cur_activated_id} {{ - tooltip: \"{docids:?}\" - style {{ - fill: \"#B6E2D3\" - }} -}} -" - ) - .unwrap(); - } - SearchEvents::WordsState { query_graph } => { - let cur_ranking_rule = timestamp.len() - 1; - *timestamp.last_mut().unwrap() += 1; - let cur_activated_id = activated_id(×tamp); - *timestamp.last_mut().unwrap() -= 1; - let id = format!("{cur_ranking_rule}.{cur_activated_id}"); - let new_file_path = self.folder_path.join(format!("{id}.d2")); - let mut new_file = std::fs::File::create(new_file_path).unwrap(); - Self::query_graph_d2_description(ctx, query_graph, &mut new_file); - writeln!( - &mut file, - "{id} {{ - link: \"{id}.d2.svg\" -}}" - ) - .unwrap(); - } - SearchEvents::ProximityState { - graph, - paths, - dead_ends_cache, - universe, - costs, - cost, - } => { - let cur_ranking_rule = timestamp.len() - 1; - *timestamp.last_mut().unwrap() += 1; - let cur_activated_id = activated_id(×tamp); - *timestamp.last_mut().unwrap() -= 1; - let id = format!("{cur_ranking_rule}.{cur_activated_id}"); - let new_file_path = self.folder_path.join(format!("{id}.d2")); - let mut new_file = std::fs::File::create(new_file_path).unwrap(); - Self::ranking_rule_graph_d2_description( - ctx, - graph, - paths, - dead_ends_cache, - costs.clone(), - &mut new_file, - ); - writeln!( - &mut file, - "{id} {{ - link: \"{id}.d2.svg\" - tooltip: \"cost {cost}, universe len: {}\" - }}", - universe.len() - ) - .unwrap(); - } - SearchEvents::TypoState { - graph, - paths, - dead_ends_cache, - universe, - costs, - cost, - } => { - let cur_ranking_rule = timestamp.len() - 1; - *timestamp.last_mut().unwrap() += 1; - let cur_activated_id = activated_id(×tamp); - *timestamp.last_mut().unwrap() -= 1; - let id = format!("{cur_ranking_rule}.{cur_activated_id}"); - let new_file_path = self.folder_path.join(format!("{id}.d2")); - let mut new_file = std::fs::File::create(new_file_path).unwrap(); - Self::ranking_rule_graph_d2_description( - ctx, - graph, - paths, - dead_ends_cache, - costs.clone(), - &mut new_file, - ); - writeln!( - &mut file, - "{id} {{ - link: \"{id}.d2.svg\" - tooltip: \"cost {cost}, universe len: {}\" -}}", - universe.len() - ) - .unwrap(); - } - } - } - writeln!(&mut file, "}}").unwrap(); - } - - fn query_node_d2_desc( - ctx: &mut SearchContext, - node_idx: Interned, - node: &QueryNode, - _costs: &[u64], - file: &mut File, - ) { - match &node.data { - QueryNodeData::Term(LocatedQueryTermSubset { - term_subset, - positions: _, - term_ids: _, - }) => { - writeln!( - file, - "{node_idx} : \"{}\" {{ - shape: class - max_nbr_typo: {}", - term_subset.description(ctx), - term_subset.max_nbr_typos(ctx) - ) - .unwrap(); - - for w in term_subset.all_single_words_except_prefix_db(ctx).unwrap() { - let w = ctx.word_interner.get(w); - writeln!(file, "{w}: word").unwrap(); - } - for p in term_subset.all_phrases(ctx).unwrap() { - writeln!(file, "{}: phrase", p.description(ctx)).unwrap(); - } - if let Some(w) = term_subset.use_prefix_db(ctx) { - let w = ctx.word_interner.get(w); - writeln!(file, "{w}: prefix db").unwrap(); - } - - writeln!(file, "}}").unwrap(); - } - QueryNodeData::Deleted => panic!(), - QueryNodeData::Start => { - writeln!(file, "{node_idx} : START").unwrap(); - } - QueryNodeData::End => { - writeln!(file, "{node_idx} : END").unwrap(); - } - } - } - fn query_graph_d2_description( - ctx: &mut SearchContext, - query_graph: &QueryGraph, - file: &mut File, - ) { - writeln!(file, "direction: right").unwrap(); - for (node_id, node) in query_graph.nodes.iter() { - if matches!(node.data, QueryNodeData::Deleted) { - continue; - } - Self::query_node_d2_desc(ctx, node_id, node, &[], file); - - for edge in node.successors.iter() { - writeln!(file, "{node_id} -> {edge};\n").unwrap(); - } - } - } - fn ranking_rule_graph_d2_description( - ctx: &mut SearchContext, - graph: &RankingRuleGraph, - paths: &[Vec>], - _dead_ends_cache: &DeadEndsCache, - costs: MappedInterner>, - file: &mut File, - ) { - writeln!(file, "direction: right").unwrap(); - - writeln!(file, "Proximity Graph {{").unwrap(); - for (node_idx, node) in graph.query_graph.nodes.iter() { - if matches!(&node.data, QueryNodeData::Deleted) { - continue; - } - let costs = &costs.get(node_idx); - Self::query_node_d2_desc(ctx, node_idx, node, costs, file); - } - for (_edge_id, edge) in graph.edges_store.iter() { - let Some(edge) = edge else { continue }; - let Edge { source_node, dest_node, condition: details, cost, nodes_to_skip: _ } = edge; - - match &details { - None => { - writeln!(file, "{source_node} -> {dest_node} : \"always cost {cost}\"",) - .unwrap(); - } - Some(condition) => { - // let condition = graph.conditions_interner.get(*condition); - writeln!( - file, - "{source_node} -> {dest_node} : \"{condition} cost {cost}\"", - cost = edge.cost, - ) - .unwrap(); - } - } - } - writeln!(file, "}}").unwrap(); - - // writeln!(file, "costs {{").unwrap(); - // Self::paths_d2_description(graph, paths, file); - // writeln!(file, "}}").unwrap(); - - writeln!(file, "Paths {{").unwrap(); - Self::paths_d2_description(ctx, graph, paths, file); - writeln!(file, "}}").unwrap(); - - // writeln!(file, "Dead-end couples of conditions {{").unwrap(); - // for (i, (e1, e2)) in dead_end_paths_cache.condition_couples.iter().enumerate() { - // writeln!(file, "{i} : \"\" {{").unwrap(); - // Self::condition_d2_description(ctx, graph, e1, file); - // for e2 in e2.iter() { - // Self::condition_d2_description(ctx, graph, e2, file); - // writeln!(file, "{e1} -- {e2}").unwrap(); - // } - // writeln!(file, "}}").unwrap(); - // } - // writeln!(file, "}}").unwrap(); - - // writeln!(file, "Dead-end edges {{").unwrap(); - // for condition in dead_end_paths_cache.conditions.iter() { - // writeln!(file, "{condition}").unwrap(); - // } - // writeln!(file, "}}").unwrap(); - - // writeln!(file, "Dead-end prefixes {{").unwrap(); - // writeln!(file, "}}").unwrap(); - } - fn condition_d2_description( - ctx: &mut SearchContext, - graph: &RankingRuleGraph, - condition_id: Interned, - file: &mut File, - ) { - let condition = graph.conditions_interner.get(condition_id); - writeln!( - file, - "{condition_id} {{ -shape: class -label -}}", - ) - .unwrap(); - } - fn paths_d2_description( - ctx: &mut SearchContext, - graph: &RankingRuleGraph, - paths: &[Vec>], - file: &mut File, - ) { - for (path_idx, condition_indexes) in paths.iter().enumerate() { - writeln!(file, "{path_idx} {{").unwrap(); - for condition in condition_indexes.iter() { - Self::condition_d2_description(ctx, graph, *condition, file); - } - for couple_edges in condition_indexes.windows(2) { - let [src_edge_idx, dest_edge_idx] = couple_edges else { panic!() }; - writeln!(file, "{src_edge_idx} -> {dest_edge_idx}").unwrap(); - } - writeln!(file, "}}").unwrap(); - } - } -} diff --git a/milli/src/search/new/logger/mod.rs b/milli/src/search/new/logger/mod.rs index 1ef048a8e..d516a5db9 100644 --- a/milli/src/search/new/logger/mod.rs +++ b/milli/src/search/new/logger/mod.rs @@ -1,84 +1,28 @@ // #[cfg(test)] -pub mod detailed; +pub mod visual; use std::any::Any; use roaring::RoaringBitmap; -use super::graph_based_ranking_rule::Typo; use super::ranking_rules::BoxRankingRule; -use super::sort::Sort; -use super::words::Words; use super::{RankingRule, RankingRuleQueryTrait}; /// Trait for structure logging the execution of a search query. pub trait SearchLogger { /// Logs the initial query - fn initial_query(&mut self, query: &Q); + fn initial_query(&mut self, _query: &Q); /// Logs the value of the initial set of all candidates - fn initial_universe(&mut self, universe: &RoaringBitmap); + fn initial_universe(&mut self, _universe: &RoaringBitmap); /// Logs the query that was used to compute the set of all candidates - fn query_for_initial_universe(&mut self, query: &Q); + fn query_for_initial_universe(&mut self, _query: &Q); /// Logs the ranking rules used to perform the search query - fn ranking_rules(&mut self, rr: &[BoxRankingRule]); + fn ranking_rules(&mut self, _rr: &[BoxRankingRule]); /// Logs the start of a ranking rule's iteration. - fn start_iteration_ranking_rule( - &mut self, - ranking_rule_idx: usize, - ranking_rule: &dyn RankingRule, - query: &Q, - universe: &RoaringBitmap, - ); - /// Logs the end of the computation of a ranking rule bucket - fn next_bucket_ranking_rule( - &mut self, - ranking_rule_idx: usize, - ranking_rule: &dyn RankingRule, - universe: &RoaringBitmap, - candidates: &RoaringBitmap, - ); - /// Logs the skipping of a ranking rule bucket - fn skip_bucket_ranking_rule( - &mut self, - ranking_rule_idx: usize, - ranking_rule: &dyn RankingRule, - candidates: &RoaringBitmap, - ); - /// Logs the end of a ranking rule's iteration. - fn end_iteration_ranking_rule( - &mut self, - ranking_rule_idx: usize, - ranking_rule: &dyn RankingRule, - universe: &RoaringBitmap, - ); - /// Logs the addition of document ids to the final results - fn add_to_results(&mut self, docids: &[u32]); - - /// Logs the internal state of the ranking rule - fn log_ranking_rule_state<'ctx>(&mut self, rr: &(dyn Any + 'ctx)) { - if let Some(_words) = rr.downcast_ref::() { - } else if let Some(_sort) = rr.downcast_ref::>() { - } else if let Some(_typo) = rr.downcast_ref::() { - } - } -} - -/// A dummy [`SearchLogger`] which does nothing. -pub struct DefaultSearchLogger; - -impl SearchLogger for DefaultSearchLogger { - fn initial_query(&mut self, _query: &Q) {} - - fn query_for_initial_universe(&mut self, _query: &Q) {} - - fn initial_universe(&mut self, _universe: &RoaringBitmap) {} - - fn ranking_rules(&mut self, _rr: &[BoxRankingRule]) {} - fn start_iteration_ranking_rule( &mut self, _ranking_rule_idx: usize, @@ -87,7 +31,7 @@ impl SearchLogger for DefaultSearchLogger { _universe: &RoaringBitmap, ) { } - + /// Logs the end of the computation of a ranking rule bucket fn next_bucket_ranking_rule( &mut self, _ranking_rule_idx: usize, @@ -96,6 +40,7 @@ impl SearchLogger for DefaultSearchLogger { _candidates: &RoaringBitmap, ) { } + /// Logs the skipping of a ranking rule bucket fn skip_bucket_ranking_rule( &mut self, _ranking_rule_idx: usize, @@ -103,7 +48,7 @@ impl SearchLogger for DefaultSearchLogger { _candidates: &RoaringBitmap, ) { } - + /// Logs the end of a ranking rule's iteration. fn end_iteration_ranking_rule( &mut self, _ranking_rule_idx: usize, @@ -111,6 +56,26 @@ impl SearchLogger for DefaultSearchLogger { _universe: &RoaringBitmap, ) { } + /// Logs the addition of document ids to the final results + fn add_to_results(&mut self, _docids: &[u32]); + + /// Logs an internal state in the search algorithms + fn log_internal_state(&mut self, _rr: &dyn Any); +} + +/// A dummy [`SearchLogger`] which does nothing. +pub struct DefaultSearchLogger; + +impl SearchLogger for DefaultSearchLogger { + fn initial_query(&mut self, _query: &Q) {} + + fn initial_universe(&mut self, _universe: &RoaringBitmap) {} + + fn query_for_initial_universe(&mut self, _query: &Q) {} + + fn ranking_rules(&mut self, _rr: &[BoxRankingRule]) {} fn add_to_results(&mut self, _docids: &[u32]) {} + + fn log_internal_state(&mut self, _rr: &dyn Any) {} } diff --git a/milli/src/search/new/logger/visual.rs b/milli/src/search/new/logger/visual.rs new file mode 100644 index 000000000..17f7ef76c --- /dev/null +++ b/milli/src/search/new/logger/visual.rs @@ -0,0 +1,525 @@ +use std::any::Any; +use std::fs::File; +use std::io::{BufWriter, Write}; +use std::path::{Path, PathBuf}; +use std::time::Instant; + +// use rand::random; +use roaring::RoaringBitmap; + +use crate::search::new::interner::Interned; +use crate::search::new::query_graph::QueryNodeData; +use crate::search::new::query_term::LocatedQueryTermSubset; +use crate::search::new::ranking_rule_graph::{ + Edge, ProximityCondition, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait, + TypoCondition, TypoGraph, +}; +use crate::search::new::ranking_rules::BoxRankingRule; +use crate::search::new::{QueryGraph, QueryNode, RankingRule, SearchContext, SearchLogger}; +use crate::Result; + +pub enum SearchEvents { + RankingRuleStartIteration { ranking_rule_idx: usize, universe_len: u64 }, + RankingRuleNextBucket { ranking_rule_idx: usize, universe_len: u64, bucket_len: u64 }, + RankingRuleSkipBucket { ranking_rule_idx: usize, bucket_len: u64 }, + RankingRuleEndIteration { ranking_rule_idx: usize, universe_len: u64 }, + ExtendResults { new: Vec }, + WordsGraph { query_graph: QueryGraph }, + ProximityGraph { graph: RankingRuleGraph }, + ProximityPaths { paths: Vec>> }, + TypoGraph { graph: RankingRuleGraph }, + TypoPaths { paths: Vec>> }, +} + +enum Location { + Words, + Typo, + Proximity, + Other, +} + +#[derive(Default)] +pub struct VisualSearchLogger { + initial_query: Option, + initial_query_time: Option, + query_for_universe: Option, + initial_universe: Option, + ranking_rules_ids: Option>, + events: Vec, + location: Vec, +} + +impl SearchLogger for VisualSearchLogger { + fn initial_query(&mut self, query: &QueryGraph) { + self.initial_query = Some(query.clone()); + self.initial_query_time = Some(Instant::now()); + } + + fn query_for_initial_universe(&mut self, query: &QueryGraph) { + self.query_for_universe = Some(query.clone()); + } + + fn initial_universe(&mut self, universe: &RoaringBitmap) { + self.initial_universe = Some(universe.clone()); + } + fn ranking_rules(&mut self, rr: &[BoxRankingRule]) { + self.ranking_rules_ids = Some(rr.iter().map(|rr| rr.id()).collect()); + } + + fn start_iteration_ranking_rule( + &mut self, + ranking_rule_idx: usize, + ranking_rule: &dyn RankingRule, + _query: &QueryGraph, + universe: &RoaringBitmap, + ) { + self.events.push(SearchEvents::RankingRuleStartIteration { + ranking_rule_idx, + universe_len: universe.len(), + }); + self.location.push(match ranking_rule.id().as_str() { + "words" => Location::Words, + "typo" => Location::Typo, + "proximity" => Location::Proximity, + _ => Location::Other, + }); + } + + fn next_bucket_ranking_rule( + &mut self, + ranking_rule_idx: usize, + _ranking_rule: &dyn RankingRule, + universe: &RoaringBitmap, + bucket: &RoaringBitmap, + ) { + self.events.push(SearchEvents::RankingRuleNextBucket { + ranking_rule_idx, + universe_len: universe.len(), + bucket_len: bucket.len(), + }); + } + fn skip_bucket_ranking_rule( + &mut self, + ranking_rule_idx: usize, + _ranking_rule: &dyn RankingRule, + bucket: &RoaringBitmap, + ) { + self.events.push(SearchEvents::RankingRuleSkipBucket { + ranking_rule_idx, + bucket_len: bucket.len(), + }) + } + + fn end_iteration_ranking_rule( + &mut self, + ranking_rule_idx: usize, + _ranking_rule: &dyn RankingRule, + universe: &RoaringBitmap, + ) { + self.events.push(SearchEvents::RankingRuleEndIteration { + ranking_rule_idx, + universe_len: universe.len(), + }); + self.location.pop(); + } + fn add_to_results(&mut self, docids: &[u32]) { + self.events.push(SearchEvents::ExtendResults { new: docids.to_vec() }); + } + + /// Logs the internal state of the ranking rule + fn log_internal_state(&mut self, state: &dyn Any) { + let Some(location) = self.location.last() else { return }; + match location { + Location::Words => { + if let Some(query_graph) = state.downcast_ref::() { + self.events.push(SearchEvents::WordsGraph { query_graph: query_graph.clone() }); + } + } + Location::Typo => { + if let Some(graph) = state.downcast_ref::>() { + self.events.push(SearchEvents::TypoGraph { graph: graph.clone() }); + } + if let Some(paths) = state.downcast_ref::>>>() { + self.events.push(SearchEvents::TypoPaths { paths: paths.clone() }); + } + } + Location::Proximity => { + if let Some(graph) = state.downcast_ref::>() { + self.events.push(SearchEvents::ProximityGraph { graph: graph.clone() }); + } + if let Some(paths) = state.downcast_ref::>>>() + { + self.events.push(SearchEvents::ProximityPaths { paths: paths.clone() }); + } + } + Location::Other => {} + } + } +} + +impl VisualSearchLogger { + pub fn finish<'ctx>(self, ctx: &'ctx mut SearchContext<'ctx>, folder: &Path) -> Result<()> { + let mut f = DetailedLoggerFinish::new(ctx, folder)?; + f.finish(self)?; + Ok(()) + } +} + +struct DetailedLoggerFinish<'ctx> { + ctx: &'ctx mut SearchContext<'ctx>, + /// The folder where all the files should be printed + folder_path: PathBuf, + /// The main file visualising the search request + index_file: BufWriter, + /// A vector of counters where each counter at index i represents the number of times + /// that the ranking rule at idx i-1 was called since its last call to `start_iteration`. + /// This is used to uniquely identify a point in the sequence diagram. + rr_action_counter: Vec, + /// The file storing information about the internal state of the latest active ranking rule + file_for_internal_state: Option>, +} + +impl<'ctx> DetailedLoggerFinish<'ctx> { + fn cur_file(&mut self) -> &mut BufWriter { + if let Some(file) = self.file_for_internal_state.as_mut() { + file + } else { + &mut self.index_file + } + } + fn pop_rr_action(&mut self) { + self.file_for_internal_state = None; + self.rr_action_counter.pop(); + } + fn push_new_rr_action(&mut self) { + self.file_for_internal_state = None; + self.rr_action_counter.push(0); + } + fn increment_cur_rr_action(&mut self) { + self.file_for_internal_state = None; + if let Some(c) = self.rr_action_counter.last_mut() { + *c += 1; + } + } + fn id_of_timestamp(&self) -> String { + let mut s = String::new(); + for t in self.rr_action_counter.iter() { + s.push_str(&format!("{t}_")); + } + s + } + fn id_of_extend_results(&self) -> String { + let mut s = String::new(); + s.push_str("results.\""); + s.push_str(&self.id_of_timestamp()); + s.push('"'); + s + } + fn id_of_last_rr_action(&self) -> String { + let mut s = String::new(); + let rr_id = if self.rr_action_counter.is_empty() { + "start.\"".to_owned() + } else { + format!("{}.\"", self.rr_action_counter.len() - 1) + }; + s.push_str(&rr_id); + s.push_str(&self.id_of_timestamp()); + s.push('"'); + s + } + fn make_new_file_for_internal_state_if_needed(&mut self) -> Result<()> { + if self.file_for_internal_state.is_some() { + return Ok(()); + } + let timestamp = self.id_of_timestamp(); + let id = self.id_of_last_rr_action(); + let new_file_path = self.folder_path.join(format!("{timestamp}.d2")); + self.file_for_internal_state = Some(BufWriter::new(File::create(new_file_path)?)); + + writeln!( + &mut self.index_file, + "{id} {{ + link: \"{timestamp}.d2.svg\" +}}" + )?; + Ok(()) + } + fn new(ctx: &'ctx mut SearchContext<'ctx>, folder_path: &Path) -> Result { + let index_path = folder_path.join("index.d2"); + let index_file = BufWriter::new(File::create(index_path)?); + + Ok(Self { + ctx, + folder_path: folder_path.to_owned(), + index_file, + rr_action_counter: vec![], + file_for_internal_state: None, + }) + } + + fn finish(&mut self, logger: VisualSearchLogger) -> Result<()> { + writeln!(&mut self.index_file, "direction: right")?; + if let Some(qg) = logger.initial_query { + writeln!(&mut self.index_file, "Initial Query Graph: {{")?; + self.write_query_graph(&qg)?; + writeln!(&mut self.index_file, "}}")?; + } + if let Some(qg) = logger.query_for_universe { + writeln!(&mut self.index_file, "Query Graph Used To Compute Universe: {{")?; + self.write_query_graph(&qg)?; + writeln!(&mut self.index_file, "}}")?; + } + let Some(ranking_rules_ids) = logger.ranking_rules_ids else { return Ok(()) }; + writeln!(&mut self.index_file, "Control Flow Between Ranking Rules: {{")?; + writeln!(&mut self.index_file, "shape: sequence_diagram")?; + writeln!(&mut self.index_file, "start")?; + for (idx, rr_id) in ranking_rules_ids.iter().enumerate() { + writeln!(&mut self.index_file, "{idx}: {rr_id}")?; + } + writeln!(&mut self.index_file, "results")?; + for event in logger.events { + self.write_event(event)?; + } + writeln!(&mut self.index_file, "}}")?; + Ok(()) + } + + fn write_event(&mut self, e: SearchEvents) -> Result<()> { + match e { + SearchEvents::RankingRuleStartIteration { ranking_rule_idx, universe_len } => { + assert!(ranking_rule_idx == self.rr_action_counter.len()); + self.write_start_iteration(universe_len)?; + } + SearchEvents::RankingRuleNextBucket { ranking_rule_idx, universe_len, bucket_len } => { + assert!(ranking_rule_idx == self.rr_action_counter.len() - 1); + self.write_next_bucket(bucket_len, universe_len)?; + } + SearchEvents::RankingRuleSkipBucket { ranking_rule_idx, bucket_len } => { + assert!(ranking_rule_idx == self.rr_action_counter.len() - 1); + self.write_skip_bucket(bucket_len)?; + } + SearchEvents::RankingRuleEndIteration { ranking_rule_idx, universe_len: _ } => { + assert!(ranking_rule_idx == self.rr_action_counter.len() - 1); + self.write_end_iteration()?; + } + SearchEvents::ExtendResults { new } => { + self.write_extend_results(new)?; + } + SearchEvents::WordsGraph { query_graph } => self.write_words_graph(query_graph)?, + SearchEvents::ProximityGraph { graph } => self.write_rr_graph(&graph)?, + SearchEvents::ProximityPaths { paths } => { + self.write_rr_graph_paths::(paths)?; + } + SearchEvents::TypoGraph { graph } => self.write_rr_graph(&graph)?, + SearchEvents::TypoPaths { paths } => { + self.write_rr_graph_paths::(paths)?; + } + } + Ok(()) + } + fn write_query_graph(&mut self, qg: &QueryGraph) -> Result<()> { + writeln!(self.cur_file(), "direction: right")?; + for (node_id, node) in qg.nodes.iter() { + if matches!(node.data, QueryNodeData::Deleted) { + continue; + } + self.write_query_node(node_id, node)?; + + for edge in node.successors.iter() { + writeln!(self.cur_file(), "{node_id} -> {edge};\n").unwrap(); + } + } + Ok(()) + } + + fn write_start_iteration(&mut self, _universe_len: u64) -> Result<()> { + let parent_action_id = self.id_of_last_rr_action(); + self.push_new_rr_action(); + let self_action_id = self.id_of_last_rr_action(); + writeln!(&mut self.index_file, "{parent_action_id} -> {self_action_id} : start iteration")?; + writeln!( + &mut self.index_file, + "{self_action_id} {{ +style {{ +fill: \"#D8A7B1\" +}} +}}" + )?; + + Ok(()) + } + fn write_next_bucket(&mut self, bucket_len: u64, universe_len: u64) -> Result<()> { + let cur_action_id = self.id_of_last_rr_action(); + self.increment_cur_rr_action(); + let next_action_id = self.id_of_last_rr_action(); + writeln!( + &mut self.index_file, + "{cur_action_id} -> {next_action_id} : next bucket {bucket_len}/{universe_len}" + )?; + + Ok(()) + } + fn write_skip_bucket(&mut self, bucket_len: u64) -> Result<()> { + let cur_action_id = self.id_of_last_rr_action(); + self.increment_cur_rr_action(); + let next_action_id = self.id_of_last_rr_action(); + writeln!( + &mut self.index_file, + "{cur_action_id} -> {next_action_id} : skip bucket ({bucket_len})" + )?; + + Ok(()) + } + fn write_end_iteration(&mut self) -> Result<()> { + let cur_action_id = self.id_of_last_rr_action(); + self.pop_rr_action(); + let parent_action_id = self.id_of_last_rr_action(); + + writeln!(&mut self.index_file, "{cur_action_id} -> {parent_action_id} : end iteration",)?; + Ok(()) + } + fn write_extend_results(&mut self, new: Vec) -> Result<()> { + if new.is_empty() { + return Ok(()); + } + + let cur_action_id = self.id_of_last_rr_action(); + let results_id = self.id_of_extend_results(); + let docids = new.iter().collect::>(); + let len = new.len(); + + writeln!( + &mut self.index_file, + "{cur_action_id} -> {results_id} : \"add {len}\" +{results_id} {{ +tooltip: \"{docids:?}\" +style {{ +fill: \"#B6E2D3\" +}} +}} +" + )?; + Ok(()) + } + + fn write_query_node(&mut self, node_idx: Interned, node: &QueryNode) -> Result<()> { + let Self { + ctx, index_file, file_for_internal_state: active_ranking_rule_state_file, .. + } = self; + let file = if let Some(file) = active_ranking_rule_state_file.as_mut() { + file + } else { + index_file + }; + match &node.data { + QueryNodeData::Term(LocatedQueryTermSubset { + term_subset, + positions: _, + term_ids: _, + }) => { + writeln!( + file, + "{node_idx} : \"{}\" {{ + shape: class + max_nbr_typo: {}", + term_subset.description(ctx), + term_subset.max_nbr_typos(ctx) + )?; + + for w in term_subset.all_single_words_except_prefix_db(ctx)? { + let w = ctx.word_interner.get(w); + writeln!(file, "{w}: word")?; + } + for p in term_subset.all_phrases(ctx)? { + writeln!(file, "{}: phrase", p.description(ctx))?; + } + if let Some(w) = term_subset.use_prefix_db(ctx) { + let w = ctx.word_interner.get(w); + writeln!(file, "{w}: prefix db")?; + } + + writeln!(file, "}}")?; + } + QueryNodeData::Deleted => panic!(), + QueryNodeData::Start => { + writeln!(file, "{node_idx} : START")?; + } + QueryNodeData::End => { + writeln!(file, "{node_idx} : END")?; + } + } + Ok(()) + } + fn write_words_graph(&mut self, qg: QueryGraph) -> Result<()> { + self.make_new_file_for_internal_state_if_needed()?; + + self.write_query_graph(&qg)?; + + Ok(()) + } + fn write_rr_graph( + &mut self, + graph: &RankingRuleGraph, + ) -> Result<()> { + self.make_new_file_for_internal_state_if_needed()?; + + writeln!(self.cur_file(), "direction: right")?; + + writeln!(self.cur_file(), "Graph {{")?; + for (node_idx, node) in graph.query_graph.nodes.iter() { + if matches!(&node.data, QueryNodeData::Deleted) { + continue; + } + self.write_query_node(node_idx, node)?; + } + for (_edge_id, edge) in graph.edges_store.iter() { + let Some(edge) = edge else { continue }; + let Edge { source_node, dest_node, condition: details, cost, nodes_to_skip: _ } = edge; + + match &details { + None => { + writeln!( + self.cur_file(), + "{source_node} -> {dest_node} : \"always cost {cost}\"", + )?; + } + Some(condition) => { + writeln!( + self.cur_file(), + "{source_node} -> {dest_node} : \"{condition} cost {cost}\"", + cost = edge.cost, + )?; + } + } + } + writeln!(self.cur_file(), "}}")?; + + Ok(()) + } + + fn write_rr_graph_paths( + &mut self, + paths: Vec>>, + ) -> Result<()> { + self.make_new_file_for_internal_state_if_needed()?; + let file = if let Some(file) = self.file_for_internal_state.as_mut() { + file + } else { + &mut self.index_file + }; + writeln!(file, "Path {{")?; + for (path_idx, condition_indexes) in paths.iter().enumerate() { + writeln!(file, "{path_idx} {{")?; + for condition in condition_indexes.iter() { + writeln!(file, "{condition}")?; + } + for couple_edges in condition_indexes.windows(2) { + let [src_edge_idx, dest_edge_idx] = couple_edges else { panic!() }; + writeln!(file, "{src_edge_idx} -> {dest_edge_idx}")?; + } + writeln!(file, "}}")?; + } + writeln!(file, "}}")?; + Ok(()) + } +} diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 5bebd3bff..7b15bcaab 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -31,7 +31,7 @@ use db_cache::DatabaseCache; use graph_based_ranking_rule::{Proximity, Typo}; use heed::RoTxn; use interner::DedupInterner; -pub use logger::detailed::DetailedSearchLogger; +pub use logger::visual::VisualSearchLogger; pub use logger::{DefaultSearchLogger, SearchLogger}; use query_graph::{QueryGraph, QueryNode}; use query_term::{located_query_terms_from_string, LocatedQueryTerm, Phrase, QueryTerm}; diff --git a/milli/src/search/new/ranking_rule_graph/mod.rs b/milli/src/search/new/ranking_rule_graph/mod.rs index 00e759a28..6a9bfff93 100644 --- a/milli/src/search/new/ranking_rule_graph/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/mod.rs @@ -82,7 +82,7 @@ impl PartialEq for Edge { /// It mostly describes how to: /// 1. Retrieve the set of edges (their cost and condition) between two nodes. /// 2. Compute the document ids satisfying a condition -pub trait RankingRuleGraphTrait: Sized { +pub trait RankingRuleGraphTrait: Sized + 'static { type Condition: Sized + Clone + PartialEq + Eq + Hash; /// Compute the document ids associated with the given edge condition, diff --git a/milli/src/search/new/words.rs b/milli/src/search/new/words.rs index c3ae07bcb..39bbc823d 100644 --- a/milli/src/search/new/words.rs +++ b/milli/src/search/new/words.rs @@ -65,6 +65,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for Words { return Ok(None); } let Some(query_graph) = &mut self.query_graph else { panic!() }; + logger.log_internal_state(query_graph); let this_bucket = compute_query_graph_docids(ctx, query_graph, universe)?;