diff --git a/Cargo.lock b/Cargo.lock index f0ebc2ff6..554a7dfea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6,6 +6,15 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccc9a9dd069569f212bc4330af9f17c4afb5e8ce185e83dbb14f1349dda18b10" +[[package]] +name = "aho-corasick" +version = "0.7.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "043164d8ba5c4c3035fec9bbee8647c0261d788f3474306f93bb65901cae0e86" +dependencies = [ + "memchr", +] + [[package]] name = "anyhow" version = "1.0.31" @@ -973,6 +982,7 @@ dependencies = [ "once_cell", "oxidized-mtbl", "rayon", + "regex", "roaring", "serde", "slice-group-by", @@ -1574,7 +1584,10 @@ version = "1.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6" dependencies = [ + "aho-corasick", + "memchr", "regex-syntax", + "thread_local 1.0.1", ] [[package]] @@ -1792,7 +1805,7 @@ dependencies = [ "chrono", "log 0.4.8", "termcolor", - "thread_local", + "thread_local 0.3.4", ] [[package]] @@ -1907,6 +1920,15 @@ dependencies = [ "unreachable", ] +[[package]] +name = "thread_local" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" +dependencies = [ + "lazy_static 1.4.0", +] + [[package]] name = "time" version = "0.1.43" diff --git a/Cargo.toml b/Cargo.toml index 4a991436f..4dbbbd776 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,9 @@ smallvec = "1.4.0" structopt = { version = "0.3.14", default-features = false } tempfile = "3.1.0" +# to highlight the documents +regex = "1.3.9" + # logging log = "0.4.8" stderrlog = "0.4.3" diff --git a/src/bin/serve.rs b/src/bin/serve.rs index 6be5438ae..f8e84555d 100644 --- a/src/bin/serve.rs +++ b/src/bin/serve.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::fs::File; use std::net::SocketAddr; use std::path::PathBuf; @@ -6,6 +7,7 @@ use std::time::Instant; use askama_warp::Template; use heed::EnvOpenOptions; +use regex::Regex; use serde::Deserialize; use structopt::StructOpt; use warp::{Filter, http::Response}; @@ -29,6 +31,10 @@ struct Opt { #[structopt(long = "db-size", default_value = "107374182400")] // 100 GB database_size: usize, + /// Disable document highlighting on the dashboard. + #[structopt(long)] + disable_highlighting: bool, + /// Verbose mode (-v, -vv, -vvv, etc.) #[structopt(short, long, parse(from_occurrences))] verbose: usize, @@ -138,6 +144,7 @@ async fn main() -> anyhow::Result<()> { } let env_cloned = env.clone(); + let disable_highlighting = opt.disable_highlighting; let query_route = warp::filters::method::post() .and(warp::path!("query")) .and(warp::body::json()) @@ -152,10 +159,20 @@ async fn main() -> anyhow::Result<()> { // We write the headers body.extend_from_slice(headers); + let re = Regex::new(r"(?i)(hello)").unwrap(); + for id in documents_ids { let content = index.documents.get(&rtxn, &BEU32::new(id)).unwrap(); let content = content.expect(&format!("could not find document {}", id)); - body.extend_from_slice(&content); + let content = std::str::from_utf8(content).unwrap(); + + let content = if disable_highlighting { + Cow::from(content) + } else { + re.replace_all(content, "$1") + }; + + body.extend_from_slice(content.as_bytes()); } }