From 79d8555620c9c47e20a9cb12794d341b869e0835 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Sun, 24 Jun 2018 19:27:24 +0200 Subject: [PATCH] feat: Make a more debug oriented search tool --- raptor-search/Cargo.lock | 7 ++++ raptor-search/Cargo.toml | 1 + raptor-search/src/main.rs | 75 +++++++++++++++++++++++++++++++-------- 3 files changed, 69 insertions(+), 14 deletions(-) diff --git a/raptor-search/Cargo.lock b/raptor-search/Cargo.lock index 8a94a05a5..ddd348fd8 100644 --- a/raptor-search/Cargo.lock +++ b/raptor-search/Cargo.lock @@ -17,6 +17,11 @@ name = "cfg-if" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "elapsed" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "env_logger" version = "0.3.5" @@ -109,6 +114,7 @@ dependencies = [ name = "raptor-search" version = "0.1.0" dependencies = [ + "elapsed 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", "fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=op-builder-with-state)", "raptor 0.1.0", @@ -169,6 +175,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bda13183df33055cbb84b847becce220d392df502ebe7a4a78d7021771ed94d0" "checksum byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "74c0b906e9446b0a2e4f760cdb3fa4b2c48cdc6db8766a845c54b6ff063fd2e9" "checksum cfg-if 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "405216fd8fe65f718daa7102ea808a946b6ce40c742998fbfd3463645552de18" +"checksum elapsed 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6f4e5af126dafd0741c2ad62d47f68b28602550102e5f0dd45c8a97fc8b49c29" "checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f" "checksum fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=op-builder-with-state)" = "" "checksum group-by 0.1.0 (git+https://github.com/Kerollmops/group-by.git)" = "" diff --git a/raptor-search/Cargo.toml b/raptor-search/Cargo.toml index 7b6e6d68d..983d95321 100644 --- a/raptor-search/Cargo.toml +++ b/raptor-search/Cargo.toml @@ -8,6 +8,7 @@ env_logger = { version = "0.3", default-features = false } raptor = { path = ".." } serde = "1.0" serde_derive = "1.0" +elapsed = "0.1" [dependencies.fst] git = "https://github.com/Kerollmops/fst.git" diff --git a/raptor-search/src/main.rs b/raptor-search/src/main.rs index ad3cfad8f..5bd8be798 100644 --- a/raptor-search/src/main.rs +++ b/raptor-search/src/main.rs @@ -1,22 +1,16 @@ extern crate env_logger; extern crate fst; extern crate raptor; +extern crate elapsed; -use std::env; +use std::{env, fs}; +use std::process::Command; +use std::io::{self, Write}; +use elapsed::measure_time; use fst::Streamer; -use raptor::{load_map, RankedStream, LevBuilder}; - -fn main() { - drop(env_logger::init()); - - let lev_builder = LevBuilder::new(); - let map = load_map("map.fst", "values.vecs").unwrap(); - - let query = env::args().nth(1).expect("Please enter query words!"); - let query = query.to_lowercase(); - - println!("Searching for: {:?}", query); +use raptor::{load_map, DocIndexMap, RankedStream, LevBuilder}; +fn search(map: &DocIndexMap, lev_builder: &LevBuilder, query: &str) { let mut automatons = Vec::new(); for query in query.split_whitespace() { let lev = lev_builder.build_automaton(query); @@ -26,6 +20,59 @@ fn main() { let limit: Option = env::var("RAPTOR_OUTPUT_LIMIT").ok().and_then(|x| x.parse().ok()); let mut stream = RankedStream::new(&map, map.values(), automatons, limit.unwrap_or(20)); while let Some(document_id) = stream.next() { - println!("{:?}", document_id); + print!("{:?}", document_id); + + /* only here to debug ! + if let Ok(_) = fs::File::open("products.json_lines") { + let output = Command::new("rg") + .arg(document_id.to_string()) + .arg("products.json_lines") + .output(); + if let Ok(Ok(output)) = output.map(|o| String::from_utf8(o.stdout)) { + if let Some(line) = output.lines().next() { + let pattern = "\"title\":"; + if let Some(index) = line.find(pattern) { + let line: String = line[index..].chars().skip(pattern.len()).take(100).collect(); + print!(" => {}", line); + } + } + } + } + // */ + + println!(); + } +} + +fn main() { + drop(env_logger::init()); + + let (elapsed, (lev_builder, map)) = measure_time(|| { + let lev_builder = LevBuilder::new(); + let map = load_map("map.fst", "values.vecs").unwrap(); + (lev_builder, map) + }); + println!("Loaded in {}", elapsed); + + match env::args().nth(1) { + Some(query) => { + println!("Searching for: {:?}", query); + let query = query.to_lowercase(); + let (elapsed, _) = measure_time(|| search(&map, &lev_builder, &query)); + println!("Finished in {}", elapsed); + }, + None => loop { + print!("Searching for: "); + io::stdout().flush().unwrap(); + + let mut query = String::new(); + io::stdin().read_line(&mut query).unwrap(); + let query = query.trim().to_lowercase(); + + if query.is_empty() { break } + + let (elapsed, _) = measure_time(|| search(&map, &lev_builder, &query)); + println!("Finished in {}", elapsed); + }, } }