From 4fa10753c1db35136dd48e2b527ee1be8ddc80b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 14 Jan 2019 21:18:46 +0100 Subject: [PATCH] chore: Display more stats infos --- Cargo.toml | 2 +- src/rank/query_builder.rs | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 02c5e0b30..668edc8b6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ authors = ["Kerollmops "] bincode = "1.0" byteorder = "1.2" crossbeam = "0.6" +elapsed = "0.1" fst = "0.3" hashbrown = { version = "0.1", features = ["serde"] } lazy_static = "1.1" @@ -42,7 +43,6 @@ nightly = ["hashbrown/nightly", "group-by/nightly"] [dev-dependencies] csv = "1.0" -elapsed = "0.1" env_logger = "0.6" jemallocator = "0.1" quickcheck = "0.8" diff --git a/src/rank/query_builder.rs b/src/rank/query_builder.rs index b8686b071..3710f1971 100644 --- a/src/rank/query_builder.rs +++ b/src/rank/query_builder.rs @@ -93,6 +93,7 @@ where D: Deref, op_builder.union() }; + let mut number_matches = 0; let mut matches = HashMap::new(); while let Some((input, indexed_values)) = stream.next() { @@ -104,6 +105,7 @@ where D: Deref, let doc_indexes = &self.view.index().positive.indexes(); let doc_indexes = &doc_indexes[iv.value as usize]; + number_matches += doc_indexes.len(); for doc_index in doc_indexes { let match_ = Match { query_index: iv.index as u32, @@ -117,7 +119,8 @@ where D: Deref, } } - info!("{} documents to classify", matches.len()); + info!("{} total documents to classify", matches.len()); + info!("{} total matches to classify", number_matches); matches.into_iter().map(|(i, m)| Document::from_matches(i, m)).collect() } @@ -135,15 +138,19 @@ where D: Deref, return builder.query(query, range); } - let mut documents = self.query_all(query); + let (elapsed, mut documents) = elapsed::measure_time(|| self.query_all(query)); + info!("query_all took {}", elapsed); + let mut groups = vec![documents.as_mut_slice()]; let view = &self.view; - 'criteria: for criterion in self.criteria.as_ref() { + 'criteria: for (ci, criterion) in self.criteria.as_ref().iter().enumerate() { let tmp_groups = mem::replace(&mut groups, Vec::new()); let mut documents_seen = 0; for group in tmp_groups { + info!("criterion {}, documents group of size {}", ci, group.len()); + // if this group does not overlap with the requested range, // push it without sorting and splitting it if documents_seen + group.len() < range.start { @@ -152,7 +159,11 @@ where D: Deref, continue; } - group.sort_unstable_by(|a, b| criterion.evaluate(a, b, view)); + let (elapsed, ()) = elapsed::measure_time(|| { + group.sort_unstable_by(|a, b| criterion.evaluate(a, b, view)); + }); + + info!("criterion {} sort took {}", ci, elapsed); for group in BinaryGroupByMut::new(group, |a, b| criterion.eq(a, b, view)) { documents_seen += group.len();