mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-19 01:18:31 +08:00
Simplify implementation of the detailed (=visual) logger
This commit is contained in:
parent
96183e804a
commit
1f813a6f3b
@ -1,6 +1,6 @@
|
||||
use std::error::Error;
|
||||
use std::io::stdin;
|
||||
use std::time::Instant;
|
||||
use std::{error::Error, path::Path};
|
||||
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::{
|
||||
@ -19,7 +19,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
program_name
|
||||
)
|
||||
});
|
||||
let detailed_logger = args.next();
|
||||
let detailed_logger_dir = args.next();
|
||||
let print_documents: bool =
|
||||
if let Some(arg) = args.next() { arg == "print-documents" } else { false };
|
||||
|
||||
@ -34,11 +34,11 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
let mut default_logger = DefaultSearchLogger;
|
||||
// FIXME: consider resetting the state of the logger between search executions as otherwise panics are possible.
|
||||
// Workaround'd here by recreating the logger on each iteration of the loop
|
||||
let mut detailed_logger = detailed_logger
|
||||
let mut detailed_logger = detailed_logger_dir
|
||||
.as_ref()
|
||||
.map(|logger_dir| milli::DetailedSearchLogger::new(logger_dir));
|
||||
.map(|logger_dir| (milli::VisualSearchLogger::default(), logger_dir));
|
||||
let logger: &mut dyn SearchLogger<_> =
|
||||
if let Some(detailed_logger) = detailed_logger.as_mut() {
|
||||
if let Some((detailed_logger, _)) = detailed_logger.as_mut() {
|
||||
detailed_logger
|
||||
} else {
|
||||
&mut default_logger
|
||||
@ -61,8 +61,8 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
&mut DefaultSearchLogger,
|
||||
logger,
|
||||
)?;
|
||||
if let Some(logger) = &detailed_logger {
|
||||
logger.write_d2_description(&mut ctx);
|
||||
if let Some((logger, dir)) = detailed_logger {
|
||||
logger.finish(&mut ctx, Path::new(dir))?;
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
println!("new: {}us, docids: {:?}", elapsed.as_micros(), docs.documents_ids);
|
||||
|
@ -79,7 +79,7 @@ pub use filter_parser::{Condition, FilterCondition, Span, Token};
|
||||
use fxhash::{FxHasher32, FxHasher64};
|
||||
pub use grenad::CompressionType;
|
||||
pub use search::new::{
|
||||
execute_search, DefaultSearchLogger, DetailedSearchLogger, SearchContext, SearchLogger,
|
||||
execute_search, DefaultSearchLogger, SearchContext, SearchLogger, VisualSearchLogger,
|
||||
};
|
||||
use serde_json::Value;
|
||||
pub use {charabia as tokenizer, heed};
|
||||
|
@ -270,6 +270,9 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
}
|
||||
})?;
|
||||
|
||||
logger.log_internal_state(graph);
|
||||
logger.log_internal_state(&good_paths);
|
||||
|
||||
// We modify the next query graph so that it only contains the subgraph
|
||||
// that was used to compute this bucket
|
||||
// But we only do it in case the bucket length is >1, because otherwise
|
||||
|
@ -1,563 +0,0 @@
|
||||
use std::any::Any;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
|
||||
// use rand::random;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::search::new::graph_based_ranking_rule::Typo;
|
||||
use crate::search::new::interner::{Interned, MappedInterner};
|
||||
use crate::search::new::query_graph::QueryNodeData;
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::ranking_rule_graph::{
|
||||
DeadEndsCache, Edge, ProximityCondition, ProximityGraph, RankingRuleGraph,
|
||||
RankingRuleGraphTrait, TypoCondition, TypoGraph,
|
||||
};
|
||||
use crate::search::new::ranking_rules::BoxRankingRule;
|
||||
use crate::search::new::sort::Sort;
|
||||
use crate::search::new::words::Words;
|
||||
use crate::search::new::{QueryGraph, QueryNode, RankingRule, SearchContext, SearchLogger};
|
||||
|
||||
pub enum SearchEvents {
|
||||
RankingRuleStartIteration {
|
||||
ranking_rule_idx: usize,
|
||||
query: QueryGraph,
|
||||
universe: RoaringBitmap,
|
||||
time: Instant,
|
||||
},
|
||||
RankingRuleNextBucket {
|
||||
ranking_rule_idx: usize,
|
||||
universe: RoaringBitmap,
|
||||
candidates: RoaringBitmap,
|
||||
time: Instant,
|
||||
},
|
||||
RankingRuleEndIteration {
|
||||
ranking_rule_idx: usize,
|
||||
universe: RoaringBitmap,
|
||||
time: Instant,
|
||||
},
|
||||
ExtendResults {
|
||||
new: Vec<u32>,
|
||||
},
|
||||
WordsState {
|
||||
query_graph: QueryGraph,
|
||||
},
|
||||
ProximityState {
|
||||
graph: RankingRuleGraph<ProximityGraph>,
|
||||
paths: Vec<Vec<Interned<ProximityCondition>>>,
|
||||
dead_ends_cache: DeadEndsCache<ProximityCondition>,
|
||||
universe: RoaringBitmap,
|
||||
costs: MappedInterner<QueryNode, Vec<u64>>,
|
||||
cost: u64,
|
||||
},
|
||||
TypoState {
|
||||
graph: RankingRuleGraph<TypoGraph>,
|
||||
paths: Vec<Vec<Interned<TypoCondition>>>,
|
||||
dead_ends_cache: DeadEndsCache<TypoCondition>,
|
||||
universe: RoaringBitmap,
|
||||
costs: MappedInterner<QueryNode, Vec<u64>>,
|
||||
cost: u64,
|
||||
},
|
||||
RankingRuleSkipBucket {
|
||||
ranking_rule_idx: usize,
|
||||
candidates: RoaringBitmap,
|
||||
time: Instant,
|
||||
},
|
||||
}
|
||||
|
||||
pub struct DetailedSearchLogger {
|
||||
folder_path: PathBuf,
|
||||
initial_query: Option<QueryGraph>,
|
||||
initial_query_time: Option<Instant>,
|
||||
query_for_universe: Option<QueryGraph>,
|
||||
initial_universe: Option<RoaringBitmap>,
|
||||
ranking_rules_ids: Option<Vec<String>>,
|
||||
events: Vec<SearchEvents>,
|
||||
}
|
||||
impl DetailedSearchLogger {
|
||||
pub fn new(folder_path: &str) -> Self {
|
||||
Self {
|
||||
folder_path: PathBuf::new().join(folder_path),
|
||||
initial_query: None,
|
||||
initial_query_time: None,
|
||||
query_for_universe: None,
|
||||
initial_universe: None,
|
||||
ranking_rules_ids: None,
|
||||
events: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||
fn initial_query(&mut self, query: &QueryGraph) {
|
||||
self.initial_query = Some(query.clone());
|
||||
self.initial_query_time = Some(Instant::now());
|
||||
}
|
||||
|
||||
fn query_for_initial_universe(&mut self, query: &QueryGraph) {
|
||||
self.query_for_universe = Some(query.clone());
|
||||
}
|
||||
|
||||
fn initial_universe(&mut self, universe: &RoaringBitmap) {
|
||||
self.initial_universe = Some(universe.clone());
|
||||
}
|
||||
fn ranking_rules(&mut self, rr: &[BoxRankingRule<QueryGraph>]) {
|
||||
self.ranking_rules_ids = Some(rr.iter().map(|rr| rr.id()).collect());
|
||||
}
|
||||
|
||||
fn start_iteration_ranking_rule(
|
||||
&mut self,
|
||||
ranking_rule_idx: usize,
|
||||
_ranking_rule: &dyn RankingRule<QueryGraph>,
|
||||
query: &QueryGraph,
|
||||
universe: &RoaringBitmap,
|
||||
) {
|
||||
self.events.push(SearchEvents::RankingRuleStartIteration {
|
||||
ranking_rule_idx,
|
||||
query: query.clone(),
|
||||
universe: universe.clone(),
|
||||
time: Instant::now(),
|
||||
})
|
||||
}
|
||||
|
||||
fn next_bucket_ranking_rule(
|
||||
&mut self,
|
||||
ranking_rule_idx: usize,
|
||||
_ranking_rule: &dyn RankingRule<QueryGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
candidates: &RoaringBitmap,
|
||||
) {
|
||||
self.events.push(SearchEvents::RankingRuleNextBucket {
|
||||
ranking_rule_idx,
|
||||
universe: universe.clone(),
|
||||
candidates: candidates.clone(),
|
||||
time: Instant::now(),
|
||||
})
|
||||
}
|
||||
fn skip_bucket_ranking_rule(
|
||||
&mut self,
|
||||
ranking_rule_idx: usize,
|
||||
_ranking_rule: &dyn RankingRule<QueryGraph>,
|
||||
candidates: &RoaringBitmap,
|
||||
) {
|
||||
self.events.push(SearchEvents::RankingRuleSkipBucket {
|
||||
ranking_rule_idx,
|
||||
candidates: candidates.clone(),
|
||||
time: Instant::now(),
|
||||
})
|
||||
}
|
||||
|
||||
fn end_iteration_ranking_rule(
|
||||
&mut self,
|
||||
ranking_rule_idx: usize,
|
||||
_ranking_rule: &dyn RankingRule<QueryGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
) {
|
||||
self.events.push(SearchEvents::RankingRuleEndIteration {
|
||||
ranking_rule_idx,
|
||||
universe: universe.clone(),
|
||||
time: Instant::now(),
|
||||
})
|
||||
}
|
||||
fn add_to_results(&mut self, docids: &[u32]) {
|
||||
self.events.push(SearchEvents::ExtendResults { new: docids.to_vec() });
|
||||
}
|
||||
|
||||
/// Logs the internal state of the ranking rule
|
||||
fn log_ranking_rule_state<'ctx>(&mut self, state: &(dyn Any + 'ctx)) {
|
||||
if let Some(_words) = state.downcast_ref::<Words>() {
|
||||
} else if let Some(_sort) = state.downcast_ref::<Sort<'ctx, QueryGraph>>() {
|
||||
} else if let Some(_typo) = state.downcast_ref::<Typo>() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DetailedSearchLogger {
|
||||
pub fn write_d2_description(&self, ctx: &mut SearchContext) {
|
||||
let mut prev_time = self.initial_query_time.unwrap();
|
||||
let mut timestamp = vec![];
|
||||
fn activated_id(timestamp: &[usize]) -> String {
|
||||
let mut s = String::new();
|
||||
s.push('0');
|
||||
for t in timestamp.iter() {
|
||||
s.push_str(&format!("{t}"));
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
let index_path = self.folder_path.join("index.d2");
|
||||
let mut file = std::fs::File::create(index_path).unwrap();
|
||||
writeln!(&mut file, "direction: right").unwrap();
|
||||
writeln!(&mut file, "Initial Query Graph: {{").unwrap();
|
||||
let initial_query_graph = self.initial_query.as_ref().unwrap();
|
||||
Self::query_graph_d2_description(ctx, initial_query_graph, &mut file);
|
||||
writeln!(&mut file, "}}").unwrap();
|
||||
|
||||
writeln!(&mut file, "Query Graph Used To Compute Universe: {{").unwrap();
|
||||
let query_graph_for_universe = self.query_for_universe.as_ref().unwrap();
|
||||
Self::query_graph_d2_description(ctx, query_graph_for_universe, &mut file);
|
||||
writeln!(&mut file, "}}").unwrap();
|
||||
|
||||
let initial_universe = self.initial_universe.as_ref().unwrap();
|
||||
writeln!(&mut file, "Initial Universe Length {}", initial_universe.len()).unwrap();
|
||||
|
||||
writeln!(&mut file, "Control Flow Between Ranking Rules: {{").unwrap();
|
||||
writeln!(&mut file, "shape: sequence_diagram").unwrap();
|
||||
for (idx, rr_id) in self.ranking_rules_ids.as_ref().unwrap().iter().enumerate() {
|
||||
writeln!(&mut file, "{idx}: {rr_id}").unwrap();
|
||||
}
|
||||
writeln!(&mut file, "results").unwrap();
|
||||
// writeln!(&mut file, "time").unwrap();
|
||||
for event in self.events.iter() {
|
||||
match event {
|
||||
SearchEvents::RankingRuleStartIteration { ranking_rule_idx, time, .. } => {
|
||||
let _elapsed = time.duration_since(prev_time);
|
||||
prev_time = *time;
|
||||
let parent_activated_id = activated_id(×tamp);
|
||||
timestamp.push(0);
|
||||
let self_activated_id = activated_id(×tamp);
|
||||
// writeln!(&mut file, "time.{self_activated_id}: {:.2}", elapsed.as_micros() as f64 / 1000.0).unwrap();
|
||||
if *ranking_rule_idx != 0 {
|
||||
let parent_ranking_rule_idx = ranking_rule_idx - 1;
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{parent_ranking_rule_idx}.{parent_activated_id} -> {ranking_rule_idx}.{self_activated_id} : start iteration",
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{ranking_rule_idx}.{self_activated_id} {{
|
||||
style {{
|
||||
fill: \"#D8A7B1\"
|
||||
}}
|
||||
}}"
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
SearchEvents::RankingRuleNextBucket {
|
||||
ranking_rule_idx,
|
||||
time,
|
||||
universe,
|
||||
candidates,
|
||||
} => {
|
||||
let _elapsed = time.duration_since(prev_time);
|
||||
prev_time = *time;
|
||||
let old_activated_id = activated_id(×tamp);
|
||||
// writeln!(&mut file, "time.{old_activated_id}: {:.2}", elapsed.as_micros() as f64 / 1000.0).unwrap();
|
||||
*timestamp.last_mut().unwrap() += 1;
|
||||
let next_activated_id = activated_id(×tamp);
|
||||
writeln!(&mut file,
|
||||
"{ranking_rule_idx}.{old_activated_id} -> {ranking_rule_idx}.{next_activated_id} : next bucket {}/{}", candidates.len(), universe.len())
|
||||
.unwrap();
|
||||
}
|
||||
SearchEvents::RankingRuleSkipBucket { ranking_rule_idx, candidates, time } => {
|
||||
let _elapsed = time.duration_since(prev_time);
|
||||
prev_time = *time;
|
||||
let old_activated_id = activated_id(×tamp);
|
||||
// writeln!(&mut file, "time.{old_activated_id}: {:.2}", elapsed.as_micros() as f64 / 1000.0).unwrap();
|
||||
*timestamp.last_mut().unwrap() += 1;
|
||||
let next_activated_id = activated_id(×tamp);
|
||||
let len = candidates.len();
|
||||
writeln!(&mut file,
|
||||
"{ranking_rule_idx}.{old_activated_id} -> {ranking_rule_idx}.{next_activated_id} : skip bucket ({len})",)
|
||||
.unwrap();
|
||||
}
|
||||
SearchEvents::RankingRuleEndIteration { ranking_rule_idx, time, .. } => {
|
||||
let _elapsed = time.duration_since(prev_time);
|
||||
prev_time = *time;
|
||||
let cur_activated_id = activated_id(×tamp);
|
||||
// writeln!(&mut file, "time.{cur_activated_id}: {:.2}", elapsed.as_micros() as f64 / 1000.0).unwrap();
|
||||
|
||||
timestamp.pop();
|
||||
let parent_activated_id = activated_id(×tamp);
|
||||
let parent_ranking_rule = if *ranking_rule_idx == 0 {
|
||||
"start".to_owned()
|
||||
} else {
|
||||
format!("{}.{parent_activated_id}", ranking_rule_idx - 1)
|
||||
};
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{ranking_rule_idx}.{cur_activated_id} -> {parent_ranking_rule} : end iteration",
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
SearchEvents::ExtendResults { new } => {
|
||||
if new.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let cur_ranking_rule = timestamp.len() - 1;
|
||||
let cur_activated_id = activated_id(×tamp);
|
||||
let docids = new.iter().collect::<Vec<_>>();
|
||||
let len = new.len();
|
||||
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{cur_ranking_rule}.{cur_activated_id} -> results.{cur_ranking_rule}{cur_activated_id} : \"add {len}\"
|
||||
results.{cur_ranking_rule}{cur_activated_id} {{
|
||||
tooltip: \"{docids:?}\"
|
||||
style {{
|
||||
fill: \"#B6E2D3\"
|
||||
}}
|
||||
}}
|
||||
"
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
SearchEvents::WordsState { query_graph } => {
|
||||
let cur_ranking_rule = timestamp.len() - 1;
|
||||
*timestamp.last_mut().unwrap() += 1;
|
||||
let cur_activated_id = activated_id(×tamp);
|
||||
*timestamp.last_mut().unwrap() -= 1;
|
||||
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
||||
let new_file_path = self.folder_path.join(format!("{id}.d2"));
|
||||
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
||||
Self::query_graph_d2_description(ctx, query_graph, &mut new_file);
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{id} {{
|
||||
link: \"{id}.d2.svg\"
|
||||
}}"
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
SearchEvents::ProximityState {
|
||||
graph,
|
||||
paths,
|
||||
dead_ends_cache,
|
||||
universe,
|
||||
costs,
|
||||
cost,
|
||||
} => {
|
||||
let cur_ranking_rule = timestamp.len() - 1;
|
||||
*timestamp.last_mut().unwrap() += 1;
|
||||
let cur_activated_id = activated_id(×tamp);
|
||||
*timestamp.last_mut().unwrap() -= 1;
|
||||
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
||||
let new_file_path = self.folder_path.join(format!("{id}.d2"));
|
||||
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
||||
Self::ranking_rule_graph_d2_description(
|
||||
ctx,
|
||||
graph,
|
||||
paths,
|
||||
dead_ends_cache,
|
||||
costs.clone(),
|
||||
&mut new_file,
|
||||
);
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{id} {{
|
||||
link: \"{id}.d2.svg\"
|
||||
tooltip: \"cost {cost}, universe len: {}\"
|
||||
}}",
|
||||
universe.len()
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
SearchEvents::TypoState {
|
||||
graph,
|
||||
paths,
|
||||
dead_ends_cache,
|
||||
universe,
|
||||
costs,
|
||||
cost,
|
||||
} => {
|
||||
let cur_ranking_rule = timestamp.len() - 1;
|
||||
*timestamp.last_mut().unwrap() += 1;
|
||||
let cur_activated_id = activated_id(×tamp);
|
||||
*timestamp.last_mut().unwrap() -= 1;
|
||||
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
||||
let new_file_path = self.folder_path.join(format!("{id}.d2"));
|
||||
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
||||
Self::ranking_rule_graph_d2_description(
|
||||
ctx,
|
||||
graph,
|
||||
paths,
|
||||
dead_ends_cache,
|
||||
costs.clone(),
|
||||
&mut new_file,
|
||||
);
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{id} {{
|
||||
link: \"{id}.d2.svg\"
|
||||
tooltip: \"cost {cost}, universe len: {}\"
|
||||
}}",
|
||||
universe.len()
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
writeln!(&mut file, "}}").unwrap();
|
||||
}
|
||||
|
||||
fn query_node_d2_desc(
|
||||
ctx: &mut SearchContext,
|
||||
node_idx: Interned<QueryNode>,
|
||||
node: &QueryNode,
|
||||
_costs: &[u64],
|
||||
file: &mut File,
|
||||
) {
|
||||
match &node.data {
|
||||
QueryNodeData::Term(LocatedQueryTermSubset {
|
||||
term_subset,
|
||||
positions: _,
|
||||
term_ids: _,
|
||||
}) => {
|
||||
writeln!(
|
||||
file,
|
||||
"{node_idx} : \"{}\" {{
|
||||
shape: class
|
||||
max_nbr_typo: {}",
|
||||
term_subset.description(ctx),
|
||||
term_subset.max_nbr_typos(ctx)
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
for w in term_subset.all_single_words_except_prefix_db(ctx).unwrap() {
|
||||
let w = ctx.word_interner.get(w);
|
||||
writeln!(file, "{w}: word").unwrap();
|
||||
}
|
||||
for p in term_subset.all_phrases(ctx).unwrap() {
|
||||
writeln!(file, "{}: phrase", p.description(ctx)).unwrap();
|
||||
}
|
||||
if let Some(w) = term_subset.use_prefix_db(ctx) {
|
||||
let w = ctx.word_interner.get(w);
|
||||
writeln!(file, "{w}: prefix db").unwrap();
|
||||
}
|
||||
|
||||
writeln!(file, "}}").unwrap();
|
||||
}
|
||||
QueryNodeData::Deleted => panic!(),
|
||||
QueryNodeData::Start => {
|
||||
writeln!(file, "{node_idx} : START").unwrap();
|
||||
}
|
||||
QueryNodeData::End => {
|
||||
writeln!(file, "{node_idx} : END").unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
fn query_graph_d2_description(
|
||||
ctx: &mut SearchContext,
|
||||
query_graph: &QueryGraph,
|
||||
file: &mut File,
|
||||
) {
|
||||
writeln!(file, "direction: right").unwrap();
|
||||
for (node_id, node) in query_graph.nodes.iter() {
|
||||
if matches!(node.data, QueryNodeData::Deleted) {
|
||||
continue;
|
||||
}
|
||||
Self::query_node_d2_desc(ctx, node_id, node, &[], file);
|
||||
|
||||
for edge in node.successors.iter() {
|
||||
writeln!(file, "{node_id} -> {edge};\n").unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(
|
||||
ctx: &mut SearchContext,
|
||||
graph: &RankingRuleGraph<R>,
|
||||
paths: &[Vec<Interned<R::Condition>>],
|
||||
_dead_ends_cache: &DeadEndsCache<R::Condition>,
|
||||
costs: MappedInterner<QueryNode, Vec<u64>>,
|
||||
file: &mut File,
|
||||
) {
|
||||
writeln!(file, "direction: right").unwrap();
|
||||
|
||||
writeln!(file, "Proximity Graph {{").unwrap();
|
||||
for (node_idx, node) in graph.query_graph.nodes.iter() {
|
||||
if matches!(&node.data, QueryNodeData::Deleted) {
|
||||
continue;
|
||||
}
|
||||
let costs = &costs.get(node_idx);
|
||||
Self::query_node_d2_desc(ctx, node_idx, node, costs, file);
|
||||
}
|
||||
for (_edge_id, edge) in graph.edges_store.iter() {
|
||||
let Some(edge) = edge else { continue };
|
||||
let Edge { source_node, dest_node, condition: details, cost, nodes_to_skip: _ } = edge;
|
||||
|
||||
match &details {
|
||||
None => {
|
||||
writeln!(file, "{source_node} -> {dest_node} : \"always cost {cost}\"",)
|
||||
.unwrap();
|
||||
}
|
||||
Some(condition) => {
|
||||
// let condition = graph.conditions_interner.get(*condition);
|
||||
writeln!(
|
||||
file,
|
||||
"{source_node} -> {dest_node} : \"{condition} cost {cost}\"",
|
||||
cost = edge.cost,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
writeln!(file, "}}").unwrap();
|
||||
|
||||
// writeln!(file, "costs {{").unwrap();
|
||||
// Self::paths_d2_description(graph, paths, file);
|
||||
// writeln!(file, "}}").unwrap();
|
||||
|
||||
writeln!(file, "Paths {{").unwrap();
|
||||
Self::paths_d2_description(ctx, graph, paths, file);
|
||||
writeln!(file, "}}").unwrap();
|
||||
|
||||
// writeln!(file, "Dead-end couples of conditions {{").unwrap();
|
||||
// for (i, (e1, e2)) in dead_end_paths_cache.condition_couples.iter().enumerate() {
|
||||
// writeln!(file, "{i} : \"\" {{").unwrap();
|
||||
// Self::condition_d2_description(ctx, graph, e1, file);
|
||||
// for e2 in e2.iter() {
|
||||
// Self::condition_d2_description(ctx, graph, e2, file);
|
||||
// writeln!(file, "{e1} -- {e2}").unwrap();
|
||||
// }
|
||||
// writeln!(file, "}}").unwrap();
|
||||
// }
|
||||
// writeln!(file, "}}").unwrap();
|
||||
|
||||
// writeln!(file, "Dead-end edges {{").unwrap();
|
||||
// for condition in dead_end_paths_cache.conditions.iter() {
|
||||
// writeln!(file, "{condition}").unwrap();
|
||||
// }
|
||||
// writeln!(file, "}}").unwrap();
|
||||
|
||||
// writeln!(file, "Dead-end prefixes {{").unwrap();
|
||||
// writeln!(file, "}}").unwrap();
|
||||
}
|
||||
fn condition_d2_description<R: RankingRuleGraphTrait>(
|
||||
ctx: &mut SearchContext,
|
||||
graph: &RankingRuleGraph<R>,
|
||||
condition_id: Interned<R::Condition>,
|
||||
file: &mut File,
|
||||
) {
|
||||
let condition = graph.conditions_interner.get(condition_id);
|
||||
writeln!(
|
||||
file,
|
||||
"{condition_id} {{
|
||||
shape: class
|
||||
label
|
||||
}}",
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
fn paths_d2_description<R: RankingRuleGraphTrait>(
|
||||
ctx: &mut SearchContext,
|
||||
graph: &RankingRuleGraph<R>,
|
||||
paths: &[Vec<Interned<R::Condition>>],
|
||||
file: &mut File,
|
||||
) {
|
||||
for (path_idx, condition_indexes) in paths.iter().enumerate() {
|
||||
writeln!(file, "{path_idx} {{").unwrap();
|
||||
for condition in condition_indexes.iter() {
|
||||
Self::condition_d2_description(ctx, graph, *condition, file);
|
||||
}
|
||||
for couple_edges in condition_indexes.windows(2) {
|
||||
let [src_edge_idx, dest_edge_idx] = couple_edges else { panic!() };
|
||||
writeln!(file, "{src_edge_idx} -> {dest_edge_idx}").unwrap();
|
||||
}
|
||||
writeln!(file, "}}").unwrap();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,84 +1,28 @@
|
||||
// #[cfg(test)]
|
||||
pub mod detailed;
|
||||
pub mod visual;
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::graph_based_ranking_rule::Typo;
|
||||
use super::ranking_rules::BoxRankingRule;
|
||||
use super::sort::Sort;
|
||||
use super::words::Words;
|
||||
use super::{RankingRule, RankingRuleQueryTrait};
|
||||
|
||||
/// Trait for structure logging the execution of a search query.
|
||||
pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
||||
/// Logs the initial query
|
||||
fn initial_query(&mut self, query: &Q);
|
||||
fn initial_query(&mut self, _query: &Q);
|
||||
|
||||
/// Logs the value of the initial set of all candidates
|
||||
fn initial_universe(&mut self, universe: &RoaringBitmap);
|
||||
fn initial_universe(&mut self, _universe: &RoaringBitmap);
|
||||
|
||||
/// Logs the query that was used to compute the set of all candidates
|
||||
fn query_for_initial_universe(&mut self, query: &Q);
|
||||
fn query_for_initial_universe(&mut self, _query: &Q);
|
||||
|
||||
/// Logs the ranking rules used to perform the search query
|
||||
fn ranking_rules(&mut self, rr: &[BoxRankingRule<Q>]);
|
||||
fn ranking_rules(&mut self, _rr: &[BoxRankingRule<Q>]);
|
||||
|
||||
/// Logs the start of a ranking rule's iteration.
|
||||
fn start_iteration_ranking_rule(
|
||||
&mut self,
|
||||
ranking_rule_idx: usize,
|
||||
ranking_rule: &dyn RankingRule<Q>,
|
||||
query: &Q,
|
||||
universe: &RoaringBitmap,
|
||||
);
|
||||
/// Logs the end of the computation of a ranking rule bucket
|
||||
fn next_bucket_ranking_rule(
|
||||
&mut self,
|
||||
ranking_rule_idx: usize,
|
||||
ranking_rule: &dyn RankingRule<Q>,
|
||||
universe: &RoaringBitmap,
|
||||
candidates: &RoaringBitmap,
|
||||
);
|
||||
/// Logs the skipping of a ranking rule bucket
|
||||
fn skip_bucket_ranking_rule(
|
||||
&mut self,
|
||||
ranking_rule_idx: usize,
|
||||
ranking_rule: &dyn RankingRule<Q>,
|
||||
candidates: &RoaringBitmap,
|
||||
);
|
||||
/// Logs the end of a ranking rule's iteration.
|
||||
fn end_iteration_ranking_rule(
|
||||
&mut self,
|
||||
ranking_rule_idx: usize,
|
||||
ranking_rule: &dyn RankingRule<Q>,
|
||||
universe: &RoaringBitmap,
|
||||
);
|
||||
/// Logs the addition of document ids to the final results
|
||||
fn add_to_results(&mut self, docids: &[u32]);
|
||||
|
||||
/// Logs the internal state of the ranking rule
|
||||
fn log_ranking_rule_state<'ctx>(&mut self, rr: &(dyn Any + 'ctx)) {
|
||||
if let Some(_words) = rr.downcast_ref::<Words>() {
|
||||
} else if let Some(_sort) = rr.downcast_ref::<Sort<'ctx, Q>>() {
|
||||
} else if let Some(_typo) = rr.downcast_ref::<Typo>() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A dummy [`SearchLogger`] which does nothing.
|
||||
pub struct DefaultSearchLogger;
|
||||
|
||||
impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||
fn initial_query(&mut self, _query: &Q) {}
|
||||
|
||||
fn query_for_initial_universe(&mut self, _query: &Q) {}
|
||||
|
||||
fn initial_universe(&mut self, _universe: &RoaringBitmap) {}
|
||||
|
||||
fn ranking_rules(&mut self, _rr: &[BoxRankingRule<Q>]) {}
|
||||
|
||||
fn start_iteration_ranking_rule(
|
||||
&mut self,
|
||||
_ranking_rule_idx: usize,
|
||||
@ -87,7 +31,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||
_universe: &RoaringBitmap,
|
||||
) {
|
||||
}
|
||||
|
||||
/// Logs the end of the computation of a ranking rule bucket
|
||||
fn next_bucket_ranking_rule(
|
||||
&mut self,
|
||||
_ranking_rule_idx: usize,
|
||||
@ -96,6 +40,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||
_candidates: &RoaringBitmap,
|
||||
) {
|
||||
}
|
||||
/// Logs the skipping of a ranking rule bucket
|
||||
fn skip_bucket_ranking_rule(
|
||||
&mut self,
|
||||
_ranking_rule_idx: usize,
|
||||
@ -103,7 +48,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||
_candidates: &RoaringBitmap,
|
||||
) {
|
||||
}
|
||||
|
||||
/// Logs the end of a ranking rule's iteration.
|
||||
fn end_iteration_ranking_rule(
|
||||
&mut self,
|
||||
_ranking_rule_idx: usize,
|
||||
@ -111,6 +56,26 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||
_universe: &RoaringBitmap,
|
||||
) {
|
||||
}
|
||||
/// Logs the addition of document ids to the final results
|
||||
fn add_to_results(&mut self, _docids: &[u32]);
|
||||
|
||||
/// Logs an internal state in the search algorithms
|
||||
fn log_internal_state(&mut self, _rr: &dyn Any);
|
||||
}
|
||||
|
||||
/// A dummy [`SearchLogger`] which does nothing.
|
||||
pub struct DefaultSearchLogger;
|
||||
|
||||
impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||
fn initial_query(&mut self, _query: &Q) {}
|
||||
|
||||
fn initial_universe(&mut self, _universe: &RoaringBitmap) {}
|
||||
|
||||
fn query_for_initial_universe(&mut self, _query: &Q) {}
|
||||
|
||||
fn ranking_rules(&mut self, _rr: &[BoxRankingRule<Q>]) {}
|
||||
|
||||
fn add_to_results(&mut self, _docids: &[u32]) {}
|
||||
|
||||
fn log_internal_state(&mut self, _rr: &dyn Any) {}
|
||||
}
|
||||
|
525
milli/src/search/new/logger/visual.rs
Normal file
525
milli/src/search/new/logger/visual.rs
Normal file
@ -0,0 +1,525 @@
|
||||
use std::any::Any;
|
||||
use std::fs::File;
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::Instant;
|
||||
|
||||
// use rand::random;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::search::new::interner::Interned;
|
||||
use crate::search::new::query_graph::QueryNodeData;
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::ranking_rule_graph::{
|
||||
Edge, ProximityCondition, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait,
|
||||
TypoCondition, TypoGraph,
|
||||
};
|
||||
use crate::search::new::ranking_rules::BoxRankingRule;
|
||||
use crate::search::new::{QueryGraph, QueryNode, RankingRule, SearchContext, SearchLogger};
|
||||
use crate::Result;
|
||||
|
||||
pub enum SearchEvents {
|
||||
RankingRuleStartIteration { ranking_rule_idx: usize, universe_len: u64 },
|
||||
RankingRuleNextBucket { ranking_rule_idx: usize, universe_len: u64, bucket_len: u64 },
|
||||
RankingRuleSkipBucket { ranking_rule_idx: usize, bucket_len: u64 },
|
||||
RankingRuleEndIteration { ranking_rule_idx: usize, universe_len: u64 },
|
||||
ExtendResults { new: Vec<u32> },
|
||||
WordsGraph { query_graph: QueryGraph },
|
||||
ProximityGraph { graph: RankingRuleGraph<ProximityGraph> },
|
||||
ProximityPaths { paths: Vec<Vec<Interned<ProximityCondition>>> },
|
||||
TypoGraph { graph: RankingRuleGraph<TypoGraph> },
|
||||
TypoPaths { paths: Vec<Vec<Interned<TypoCondition>>> },
|
||||
}
|
||||
|
||||
enum Location {
|
||||
Words,
|
||||
Typo,
|
||||
Proximity,
|
||||
Other,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct VisualSearchLogger {
|
||||
initial_query: Option<QueryGraph>,
|
||||
initial_query_time: Option<Instant>,
|
||||
query_for_universe: Option<QueryGraph>,
|
||||
initial_universe: Option<RoaringBitmap>,
|
||||
ranking_rules_ids: Option<Vec<String>>,
|
||||
events: Vec<SearchEvents>,
|
||||
location: Vec<Location>,
|
||||
}
|
||||
|
||||
impl SearchLogger<QueryGraph> for VisualSearchLogger {
|
||||
fn initial_query(&mut self, query: &QueryGraph) {
|
||||
self.initial_query = Some(query.clone());
|
||||
self.initial_query_time = Some(Instant::now());
|
||||
}
|
||||
|
||||
fn query_for_initial_universe(&mut self, query: &QueryGraph) {
|
||||
self.query_for_universe = Some(query.clone());
|
||||
}
|
||||
|
||||
fn initial_universe(&mut self, universe: &RoaringBitmap) {
|
||||
self.initial_universe = Some(universe.clone());
|
||||
}
|
||||
fn ranking_rules(&mut self, rr: &[BoxRankingRule<QueryGraph>]) {
|
||||
self.ranking_rules_ids = Some(rr.iter().map(|rr| rr.id()).collect());
|
||||
}
|
||||
|
||||
fn start_iteration_ranking_rule(
|
||||
&mut self,
|
||||
ranking_rule_idx: usize,
|
||||
ranking_rule: &dyn RankingRule<QueryGraph>,
|
||||
_query: &QueryGraph,
|
||||
universe: &RoaringBitmap,
|
||||
) {
|
||||
self.events.push(SearchEvents::RankingRuleStartIteration {
|
||||
ranking_rule_idx,
|
||||
universe_len: universe.len(),
|
||||
});
|
||||
self.location.push(match ranking_rule.id().as_str() {
|
||||
"words" => Location::Words,
|
||||
"typo" => Location::Typo,
|
||||
"proximity" => Location::Proximity,
|
||||
_ => Location::Other,
|
||||
});
|
||||
}
|
||||
|
||||
fn next_bucket_ranking_rule(
|
||||
&mut self,
|
||||
ranking_rule_idx: usize,
|
||||
_ranking_rule: &dyn RankingRule<QueryGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
bucket: &RoaringBitmap,
|
||||
) {
|
||||
self.events.push(SearchEvents::RankingRuleNextBucket {
|
||||
ranking_rule_idx,
|
||||
universe_len: universe.len(),
|
||||
bucket_len: bucket.len(),
|
||||
});
|
||||
}
|
||||
fn skip_bucket_ranking_rule(
|
||||
&mut self,
|
||||
ranking_rule_idx: usize,
|
||||
_ranking_rule: &dyn RankingRule<QueryGraph>,
|
||||
bucket: &RoaringBitmap,
|
||||
) {
|
||||
self.events.push(SearchEvents::RankingRuleSkipBucket {
|
||||
ranking_rule_idx,
|
||||
bucket_len: bucket.len(),
|
||||
})
|
||||
}
|
||||
|
||||
fn end_iteration_ranking_rule(
|
||||
&mut self,
|
||||
ranking_rule_idx: usize,
|
||||
_ranking_rule: &dyn RankingRule<QueryGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
) {
|
||||
self.events.push(SearchEvents::RankingRuleEndIteration {
|
||||
ranking_rule_idx,
|
||||
universe_len: universe.len(),
|
||||
});
|
||||
self.location.pop();
|
||||
}
|
||||
fn add_to_results(&mut self, docids: &[u32]) {
|
||||
self.events.push(SearchEvents::ExtendResults { new: docids.to_vec() });
|
||||
}
|
||||
|
||||
/// Logs the internal state of the ranking rule
|
||||
fn log_internal_state(&mut self, state: &dyn Any) {
|
||||
let Some(location) = self.location.last() else { return };
|
||||
match location {
|
||||
Location::Words => {
|
||||
if let Some(query_graph) = state.downcast_ref::<QueryGraph>() {
|
||||
self.events.push(SearchEvents::WordsGraph { query_graph: query_graph.clone() });
|
||||
}
|
||||
}
|
||||
Location::Typo => {
|
||||
if let Some(graph) = state.downcast_ref::<RankingRuleGraph<TypoGraph>>() {
|
||||
self.events.push(SearchEvents::TypoGraph { graph: graph.clone() });
|
||||
}
|
||||
if let Some(paths) = state.downcast_ref::<Vec<Vec<Interned<TypoCondition>>>>() {
|
||||
self.events.push(SearchEvents::TypoPaths { paths: paths.clone() });
|
||||
}
|
||||
}
|
||||
Location::Proximity => {
|
||||
if let Some(graph) = state.downcast_ref::<RankingRuleGraph<ProximityGraph>>() {
|
||||
self.events.push(SearchEvents::ProximityGraph { graph: graph.clone() });
|
||||
}
|
||||
if let Some(paths) = state.downcast_ref::<Vec<Vec<Interned<ProximityCondition>>>>()
|
||||
{
|
||||
self.events.push(SearchEvents::ProximityPaths { paths: paths.clone() });
|
||||
}
|
||||
}
|
||||
Location::Other => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl VisualSearchLogger {
|
||||
pub fn finish<'ctx>(self, ctx: &'ctx mut SearchContext<'ctx>, folder: &Path) -> Result<()> {
|
||||
let mut f = DetailedLoggerFinish::new(ctx, folder)?;
|
||||
f.finish(self)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
struct DetailedLoggerFinish<'ctx> {
|
||||
ctx: &'ctx mut SearchContext<'ctx>,
|
||||
/// The folder where all the files should be printed
|
||||
folder_path: PathBuf,
|
||||
/// The main file visualising the search request
|
||||
index_file: BufWriter<File>,
|
||||
/// A vector of counters where each counter at index i represents the number of times
|
||||
/// that the ranking rule at idx i-1 was called since its last call to `start_iteration`.
|
||||
/// This is used to uniquely identify a point in the sequence diagram.
|
||||
rr_action_counter: Vec<usize>,
|
||||
/// The file storing information about the internal state of the latest active ranking rule
|
||||
file_for_internal_state: Option<BufWriter<File>>,
|
||||
}
|
||||
|
||||
impl<'ctx> DetailedLoggerFinish<'ctx> {
|
||||
fn cur_file(&mut self) -> &mut BufWriter<File> {
|
||||
if let Some(file) = self.file_for_internal_state.as_mut() {
|
||||
file
|
||||
} else {
|
||||
&mut self.index_file
|
||||
}
|
||||
}
|
||||
fn pop_rr_action(&mut self) {
|
||||
self.file_for_internal_state = None;
|
||||
self.rr_action_counter.pop();
|
||||
}
|
||||
fn push_new_rr_action(&mut self) {
|
||||
self.file_for_internal_state = None;
|
||||
self.rr_action_counter.push(0);
|
||||
}
|
||||
fn increment_cur_rr_action(&mut self) {
|
||||
self.file_for_internal_state = None;
|
||||
if let Some(c) = self.rr_action_counter.last_mut() {
|
||||
*c += 1;
|
||||
}
|
||||
}
|
||||
fn id_of_timestamp(&self) -> String {
|
||||
let mut s = String::new();
|
||||
for t in self.rr_action_counter.iter() {
|
||||
s.push_str(&format!("{t}_"));
|
||||
}
|
||||
s
|
||||
}
|
||||
fn id_of_extend_results(&self) -> String {
|
||||
let mut s = String::new();
|
||||
s.push_str("results.\"");
|
||||
s.push_str(&self.id_of_timestamp());
|
||||
s.push('"');
|
||||
s
|
||||
}
|
||||
fn id_of_last_rr_action(&self) -> String {
|
||||
let mut s = String::new();
|
||||
let rr_id = if self.rr_action_counter.is_empty() {
|
||||
"start.\"".to_owned()
|
||||
} else {
|
||||
format!("{}.\"", self.rr_action_counter.len() - 1)
|
||||
};
|
||||
s.push_str(&rr_id);
|
||||
s.push_str(&self.id_of_timestamp());
|
||||
s.push('"');
|
||||
s
|
||||
}
|
||||
fn make_new_file_for_internal_state_if_needed(&mut self) -> Result<()> {
|
||||
if self.file_for_internal_state.is_some() {
|
||||
return Ok(());
|
||||
}
|
||||
let timestamp = self.id_of_timestamp();
|
||||
let id = self.id_of_last_rr_action();
|
||||
let new_file_path = self.folder_path.join(format!("{timestamp}.d2"));
|
||||
self.file_for_internal_state = Some(BufWriter::new(File::create(new_file_path)?));
|
||||
|
||||
writeln!(
|
||||
&mut self.index_file,
|
||||
"{id} {{
|
||||
link: \"{timestamp}.d2.svg\"
|
||||
}}"
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
fn new(ctx: &'ctx mut SearchContext<'ctx>, folder_path: &Path) -> Result<Self> {
|
||||
let index_path = folder_path.join("index.d2");
|
||||
let index_file = BufWriter::new(File::create(index_path)?);
|
||||
|
||||
Ok(Self {
|
||||
ctx,
|
||||
folder_path: folder_path.to_owned(),
|
||||
index_file,
|
||||
rr_action_counter: vec![],
|
||||
file_for_internal_state: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn finish(&mut self, logger: VisualSearchLogger) -> Result<()> {
|
||||
writeln!(&mut self.index_file, "direction: right")?;
|
||||
if let Some(qg) = logger.initial_query {
|
||||
writeln!(&mut self.index_file, "Initial Query Graph: {{")?;
|
||||
self.write_query_graph(&qg)?;
|
||||
writeln!(&mut self.index_file, "}}")?;
|
||||
}
|
||||
if let Some(qg) = logger.query_for_universe {
|
||||
writeln!(&mut self.index_file, "Query Graph Used To Compute Universe: {{")?;
|
||||
self.write_query_graph(&qg)?;
|
||||
writeln!(&mut self.index_file, "}}")?;
|
||||
}
|
||||
let Some(ranking_rules_ids) = logger.ranking_rules_ids else { return Ok(()) };
|
||||
writeln!(&mut self.index_file, "Control Flow Between Ranking Rules: {{")?;
|
||||
writeln!(&mut self.index_file, "shape: sequence_diagram")?;
|
||||
writeln!(&mut self.index_file, "start")?;
|
||||
for (idx, rr_id) in ranking_rules_ids.iter().enumerate() {
|
||||
writeln!(&mut self.index_file, "{idx}: {rr_id}")?;
|
||||
}
|
||||
writeln!(&mut self.index_file, "results")?;
|
||||
for event in logger.events {
|
||||
self.write_event(event)?;
|
||||
}
|
||||
writeln!(&mut self.index_file, "}}")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_event(&mut self, e: SearchEvents) -> Result<()> {
|
||||
match e {
|
||||
SearchEvents::RankingRuleStartIteration { ranking_rule_idx, universe_len } => {
|
||||
assert!(ranking_rule_idx == self.rr_action_counter.len());
|
||||
self.write_start_iteration(universe_len)?;
|
||||
}
|
||||
SearchEvents::RankingRuleNextBucket { ranking_rule_idx, universe_len, bucket_len } => {
|
||||
assert!(ranking_rule_idx == self.rr_action_counter.len() - 1);
|
||||
self.write_next_bucket(bucket_len, universe_len)?;
|
||||
}
|
||||
SearchEvents::RankingRuleSkipBucket { ranking_rule_idx, bucket_len } => {
|
||||
assert!(ranking_rule_idx == self.rr_action_counter.len() - 1);
|
||||
self.write_skip_bucket(bucket_len)?;
|
||||
}
|
||||
SearchEvents::RankingRuleEndIteration { ranking_rule_idx, universe_len: _ } => {
|
||||
assert!(ranking_rule_idx == self.rr_action_counter.len() - 1);
|
||||
self.write_end_iteration()?;
|
||||
}
|
||||
SearchEvents::ExtendResults { new } => {
|
||||
self.write_extend_results(new)?;
|
||||
}
|
||||
SearchEvents::WordsGraph { query_graph } => self.write_words_graph(query_graph)?,
|
||||
SearchEvents::ProximityGraph { graph } => self.write_rr_graph(&graph)?,
|
||||
SearchEvents::ProximityPaths { paths } => {
|
||||
self.write_rr_graph_paths::<ProximityGraph>(paths)?;
|
||||
}
|
||||
SearchEvents::TypoGraph { graph } => self.write_rr_graph(&graph)?,
|
||||
SearchEvents::TypoPaths { paths } => {
|
||||
self.write_rr_graph_paths::<TypoGraph>(paths)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
fn write_query_graph(&mut self, qg: &QueryGraph) -> Result<()> {
|
||||
writeln!(self.cur_file(), "direction: right")?;
|
||||
for (node_id, node) in qg.nodes.iter() {
|
||||
if matches!(node.data, QueryNodeData::Deleted) {
|
||||
continue;
|
||||
}
|
||||
self.write_query_node(node_id, node)?;
|
||||
|
||||
for edge in node.successors.iter() {
|
||||
writeln!(self.cur_file(), "{node_id} -> {edge};\n").unwrap();
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_start_iteration(&mut self, _universe_len: u64) -> Result<()> {
|
||||
let parent_action_id = self.id_of_last_rr_action();
|
||||
self.push_new_rr_action();
|
||||
let self_action_id = self.id_of_last_rr_action();
|
||||
writeln!(&mut self.index_file, "{parent_action_id} -> {self_action_id} : start iteration")?;
|
||||
writeln!(
|
||||
&mut self.index_file,
|
||||
"{self_action_id} {{
|
||||
style {{
|
||||
fill: \"#D8A7B1\"
|
||||
}}
|
||||
}}"
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
fn write_next_bucket(&mut self, bucket_len: u64, universe_len: u64) -> Result<()> {
|
||||
let cur_action_id = self.id_of_last_rr_action();
|
||||
self.increment_cur_rr_action();
|
||||
let next_action_id = self.id_of_last_rr_action();
|
||||
writeln!(
|
||||
&mut self.index_file,
|
||||
"{cur_action_id} -> {next_action_id} : next bucket {bucket_len}/{universe_len}"
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
fn write_skip_bucket(&mut self, bucket_len: u64) -> Result<()> {
|
||||
let cur_action_id = self.id_of_last_rr_action();
|
||||
self.increment_cur_rr_action();
|
||||
let next_action_id = self.id_of_last_rr_action();
|
||||
writeln!(
|
||||
&mut self.index_file,
|
||||
"{cur_action_id} -> {next_action_id} : skip bucket ({bucket_len})"
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
fn write_end_iteration(&mut self) -> Result<()> {
|
||||
let cur_action_id = self.id_of_last_rr_action();
|
||||
self.pop_rr_action();
|
||||
let parent_action_id = self.id_of_last_rr_action();
|
||||
|
||||
writeln!(&mut self.index_file, "{cur_action_id} -> {parent_action_id} : end iteration",)?;
|
||||
Ok(())
|
||||
}
|
||||
fn write_extend_results(&mut self, new: Vec<u32>) -> Result<()> {
|
||||
if new.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let cur_action_id = self.id_of_last_rr_action();
|
||||
let results_id = self.id_of_extend_results();
|
||||
let docids = new.iter().collect::<Vec<_>>();
|
||||
let len = new.len();
|
||||
|
||||
writeln!(
|
||||
&mut self.index_file,
|
||||
"{cur_action_id} -> {results_id} : \"add {len}\"
|
||||
{results_id} {{
|
||||
tooltip: \"{docids:?}\"
|
||||
style {{
|
||||
fill: \"#B6E2D3\"
|
||||
}}
|
||||
}}
|
||||
"
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_query_node(&mut self, node_idx: Interned<QueryNode>, node: &QueryNode) -> Result<()> {
|
||||
let Self {
|
||||
ctx, index_file, file_for_internal_state: active_ranking_rule_state_file, ..
|
||||
} = self;
|
||||
let file = if let Some(file) = active_ranking_rule_state_file.as_mut() {
|
||||
file
|
||||
} else {
|
||||
index_file
|
||||
};
|
||||
match &node.data {
|
||||
QueryNodeData::Term(LocatedQueryTermSubset {
|
||||
term_subset,
|
||||
positions: _,
|
||||
term_ids: _,
|
||||
}) => {
|
||||
writeln!(
|
||||
file,
|
||||
"{node_idx} : \"{}\" {{
|
||||
shape: class
|
||||
max_nbr_typo: {}",
|
||||
term_subset.description(ctx),
|
||||
term_subset.max_nbr_typos(ctx)
|
||||
)?;
|
||||
|
||||
for w in term_subset.all_single_words_except_prefix_db(ctx)? {
|
||||
let w = ctx.word_interner.get(w);
|
||||
writeln!(file, "{w}: word")?;
|
||||
}
|
||||
for p in term_subset.all_phrases(ctx)? {
|
||||
writeln!(file, "{}: phrase", p.description(ctx))?;
|
||||
}
|
||||
if let Some(w) = term_subset.use_prefix_db(ctx) {
|
||||
let w = ctx.word_interner.get(w);
|
||||
writeln!(file, "{w}: prefix db")?;
|
||||
}
|
||||
|
||||
writeln!(file, "}}")?;
|
||||
}
|
||||
QueryNodeData::Deleted => panic!(),
|
||||
QueryNodeData::Start => {
|
||||
writeln!(file, "{node_idx} : START")?;
|
||||
}
|
||||
QueryNodeData::End => {
|
||||
writeln!(file, "{node_idx} : END")?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
fn write_words_graph(&mut self, qg: QueryGraph) -> Result<()> {
|
||||
self.make_new_file_for_internal_state_if_needed()?;
|
||||
|
||||
self.write_query_graph(&qg)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
fn write_rr_graph<R: RankingRuleGraphTrait>(
|
||||
&mut self,
|
||||
graph: &RankingRuleGraph<R>,
|
||||
) -> Result<()> {
|
||||
self.make_new_file_for_internal_state_if_needed()?;
|
||||
|
||||
writeln!(self.cur_file(), "direction: right")?;
|
||||
|
||||
writeln!(self.cur_file(), "Graph {{")?;
|
||||
for (node_idx, node) in graph.query_graph.nodes.iter() {
|
||||
if matches!(&node.data, QueryNodeData::Deleted) {
|
||||
continue;
|
||||
}
|
||||
self.write_query_node(node_idx, node)?;
|
||||
}
|
||||
for (_edge_id, edge) in graph.edges_store.iter() {
|
||||
let Some(edge) = edge else { continue };
|
||||
let Edge { source_node, dest_node, condition: details, cost, nodes_to_skip: _ } = edge;
|
||||
|
||||
match &details {
|
||||
None => {
|
||||
writeln!(
|
||||
self.cur_file(),
|
||||
"{source_node} -> {dest_node} : \"always cost {cost}\"",
|
||||
)?;
|
||||
}
|
||||
Some(condition) => {
|
||||
writeln!(
|
||||
self.cur_file(),
|
||||
"{source_node} -> {dest_node} : \"{condition} cost {cost}\"",
|
||||
cost = edge.cost,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
writeln!(self.cur_file(), "}}")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_rr_graph_paths<R: RankingRuleGraphTrait>(
|
||||
&mut self,
|
||||
paths: Vec<Vec<Interned<R::Condition>>>,
|
||||
) -> Result<()> {
|
||||
self.make_new_file_for_internal_state_if_needed()?;
|
||||
let file = if let Some(file) = self.file_for_internal_state.as_mut() {
|
||||
file
|
||||
} else {
|
||||
&mut self.index_file
|
||||
};
|
||||
writeln!(file, "Path {{")?;
|
||||
for (path_idx, condition_indexes) in paths.iter().enumerate() {
|
||||
writeln!(file, "{path_idx} {{")?;
|
||||
for condition in condition_indexes.iter() {
|
||||
writeln!(file, "{condition}")?;
|
||||
}
|
||||
for couple_edges in condition_indexes.windows(2) {
|
||||
let [src_edge_idx, dest_edge_idx] = couple_edges else { panic!() };
|
||||
writeln!(file, "{src_edge_idx} -> {dest_edge_idx}")?;
|
||||
}
|
||||
writeln!(file, "}}")?;
|
||||
}
|
||||
writeln!(file, "}}")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
@ -31,7 +31,7 @@ use db_cache::DatabaseCache;
|
||||
use graph_based_ranking_rule::{Proximity, Typo};
|
||||
use heed::RoTxn;
|
||||
use interner::DedupInterner;
|
||||
pub use logger::detailed::DetailedSearchLogger;
|
||||
pub use logger::visual::VisualSearchLogger;
|
||||
pub use logger::{DefaultSearchLogger, SearchLogger};
|
||||
use query_graph::{QueryGraph, QueryNode};
|
||||
use query_term::{located_query_terms_from_string, LocatedQueryTerm, Phrase, QueryTerm};
|
||||
|
@ -82,7 +82,7 @@ impl<E> PartialEq for Edge<E> {
|
||||
/// It mostly describes how to:
|
||||
/// 1. Retrieve the set of edges (their cost and condition) between two nodes.
|
||||
/// 2. Compute the document ids satisfying a condition
|
||||
pub trait RankingRuleGraphTrait: Sized {
|
||||
pub trait RankingRuleGraphTrait: Sized + 'static {
|
||||
type Condition: Sized + Clone + PartialEq + Eq + Hash;
|
||||
|
||||
/// Compute the document ids associated with the given edge condition,
|
||||
|
@ -65,6 +65,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for Words {
|
||||
return Ok(None);
|
||||
}
|
||||
let Some(query_graph) = &mut self.query_graph else { panic!() };
|
||||
logger.log_internal_state(query_graph);
|
||||
|
||||
let this_bucket = compute_query_graph_docids(ctx, query_graph, universe)?;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user