Add more time debug measurements

This commit is contained in:
Kerollmops 2020-06-10 21:35:01 +02:00
parent 66a4b26811
commit 6ca3579cc0
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
2 changed files with 28 additions and 4 deletions

View File

@ -1,4 +1,6 @@
use std::cmp; use std::cmp;
use std::time::Instant;
use pathfinding::directed::dijkstra::dijkstra; use pathfinding::directed::dijkstra::dijkstra;
const ONE_ATTRIBUTE: u32 = 1000; const ONE_ATTRIBUTE: u32 = 1000;
@ -95,6 +97,8 @@ impl Iterator for BestProximity {
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
let mut output: Option<(u32, Vec<Vec<u32>>)> = None; let mut output: Option<(u32, Vec<Vec<u32>>)> = None;
let before = Instant::now();
loop { loop {
let result = dijkstra( let result = dijkstra(
&Path::new(&self.positions)?, &Path::new(&self.positions)?,
@ -128,6 +132,8 @@ impl Iterator for BestProximity {
} }
} }
eprintln!("BestProximity::next() took {:.02?}", before.elapsed());
if let Some((proximity, _)) = output.as_ref() { if let Some((proximity, _)) = output.as_ref() {
self.best_proximity = proximity + 1; self.best_proximity = proximity + 1;
} }

View File

@ -4,6 +4,7 @@ mod query_tokens;
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashMap; use std::collections::HashMap;
use std::hash::BuildHasherDefault; use std::hash::BuildHasherDefault;
use std::time::Instant;
use cow_utils::CowUtils; use cow_utils::CowUtils;
use fst::{IntoStreamer, Streamer}; use fst::{IntoStreamer, Streamer};
@ -90,6 +91,7 @@ impl Index {
let mut words_positions = Vec::new(); let mut words_positions = Vec::new();
let mut positions = Vec::new(); let mut positions = Vec::new();
let before = Instant::now();
for (word, is_prefix, dfa) in dfas { for (word, is_prefix, dfa) in dfas {
let mut count = 0; let mut count = 0;
@ -117,17 +119,24 @@ impl Index {
positions.push(union_positions.iter().collect()); positions.push(union_positions.iter().collect());
} }
eprintln!("Retrieving words positions took {:.02?}", before.elapsed());
let mut documents = Vec::new(); let mut documents = Vec::new();
for (_proximity, positions) in BestProximity::new(positions) { 'outer: for (proximity, positions) in BestProximity::new(positions) {
let same_prox_before = Instant::now();
let mut same_proximity_union = RoaringBitmap::default(); let mut same_proximity_union = RoaringBitmap::default();
for positions in positions { for positions in positions {
let before = Instant::now();
let mut intersect_docids: Option<RoaringBitmap> = None; let mut intersect_docids: Option<RoaringBitmap> = None;
for ((word, is_prefix, dfa), pos) in words_positions.iter().zip(positions) { for ((word, is_prefix, dfa), pos) in words_positions.iter().zip(positions.clone()) {
let mut count = 0; let mut count = 0;
let mut union_docids = RoaringBitmap::default(); let mut union_docids = RoaringBitmap::default();
let before = Instant::now();
// TODO re-enable the prefixes system // TODO re-enable the prefixes system
if false && word.len() <= 4 && *is_prefix { if false && word.len() <= 4 && *is_prefix {
let mut key = word.as_bytes()[..word.len().min(5)].to_vec(); let mut key = word.as_bytes()[..word.len().min(5)].to_vec();
@ -151,24 +160,33 @@ impl Index {
} }
} }
let _ = count; let before_intersect = Instant::now();
match &mut intersect_docids { match &mut intersect_docids {
Some(left) => left.intersect_with(&union_docids), Some(left) => left.intersect_with(&union_docids),
None => intersect_docids = Some(union_docids), None => intersect_docids = Some(union_docids),
} }
eprintln!("retrieving {} word took {:.02?} and took {:.02?} to intersect",
count, before.elapsed(), before_intersect.elapsed());
} }
eprintln!("for proximity {:?} {:?} we took {:.02?} to find {} documents",
proximity, positions, before.elapsed(),
intersect_docids.as_ref().map_or(0, |rb| rb.len()));
if let Some(intersect_docids) = intersect_docids { if let Some(intersect_docids) = intersect_docids {
same_proximity_union.union_with(&intersect_docids); same_proximity_union.union_with(&intersect_docids);
} }
} }
eprintln!("proximity {} took a total of {:.02?}", proximity, same_prox_before.elapsed());
documents.push(same_proximity_union); documents.push(same_proximity_union);
// We found enough documents we can stop here // We found enough documents we can stop here
if documents.iter().map(RoaringBitmap::len).sum::<u64>() >= 20 { if documents.iter().map(RoaringBitmap::len).sum::<u64>() >= 20 {
break break 'outer;
} }
} }