Use the regex crate to highlight "hello"

This commit is contained in:
Kerollmops 2020-07-14 11:27:46 +02:00
parent dd385ad05b
commit 085c376655
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
3 changed files with 44 additions and 2 deletions

24
Cargo.lock generated
View File

@ -6,6 +6,15 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccc9a9dd069569f212bc4330af9f17c4afb5e8ce185e83dbb14f1349dda18b10" checksum = "ccc9a9dd069569f212bc4330af9f17c4afb5e8ce185e83dbb14f1349dda18b10"
[[package]]
name = "aho-corasick"
version = "0.7.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "043164d8ba5c4c3035fec9bbee8647c0261d788f3474306f93bb65901cae0e86"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "anyhow" name = "anyhow"
version = "1.0.31" version = "1.0.31"
@ -973,6 +982,7 @@ dependencies = [
"once_cell", "once_cell",
"oxidized-mtbl", "oxidized-mtbl",
"rayon", "rayon",
"regex",
"roaring", "roaring",
"serde", "serde",
"slice-group-by", "slice-group-by",
@ -1574,7 +1584,10 @@ version = "1.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6" checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6"
dependencies = [ dependencies = [
"aho-corasick",
"memchr",
"regex-syntax", "regex-syntax",
"thread_local 1.0.1",
] ]
[[package]] [[package]]
@ -1792,7 +1805,7 @@ dependencies = [
"chrono", "chrono",
"log 0.4.8", "log 0.4.8",
"termcolor", "termcolor",
"thread_local", "thread_local 0.3.4",
] ]
[[package]] [[package]]
@ -1907,6 +1920,15 @@ dependencies = [
"unreachable", "unreachable",
] ]
[[package]]
name = "thread_local"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
dependencies = [
"lazy_static 1.4.0",
]
[[package]] [[package]]
name = "time" name = "time"
version = "0.1.43" version = "0.1.43"

View File

@ -27,6 +27,9 @@ smallvec = "1.4.0"
structopt = { version = "0.3.14", default-features = false } structopt = { version = "0.3.14", default-features = false }
tempfile = "3.1.0" tempfile = "3.1.0"
# to highlight the documents
regex = "1.3.9"
# logging # logging
log = "0.4.8" log = "0.4.8"
stderrlog = "0.4.3" stderrlog = "0.4.3"

View File

@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::fs::File; use std::fs::File;
use std::net::SocketAddr; use std::net::SocketAddr;
use std::path::PathBuf; use std::path::PathBuf;
@ -6,6 +7,7 @@ use std::time::Instant;
use askama_warp::Template; use askama_warp::Template;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use regex::Regex;
use serde::Deserialize; use serde::Deserialize;
use structopt::StructOpt; use structopt::StructOpt;
use warp::{Filter, http::Response}; use warp::{Filter, http::Response};
@ -29,6 +31,10 @@ struct Opt {
#[structopt(long = "db-size", default_value = "107374182400")] // 100 GB #[structopt(long = "db-size", default_value = "107374182400")] // 100 GB
database_size: usize, database_size: usize,
/// Disable document highlighting on the dashboard.
#[structopt(long)]
disable_highlighting: bool,
/// Verbose mode (-v, -vv, -vvv, etc.) /// Verbose mode (-v, -vv, -vvv, etc.)
#[structopt(short, long, parse(from_occurrences))] #[structopt(short, long, parse(from_occurrences))]
verbose: usize, verbose: usize,
@ -138,6 +144,7 @@ async fn main() -> anyhow::Result<()> {
} }
let env_cloned = env.clone(); let env_cloned = env.clone();
let disable_highlighting = opt.disable_highlighting;
let query_route = warp::filters::method::post() let query_route = warp::filters::method::post()
.and(warp::path!("query")) .and(warp::path!("query"))
.and(warp::body::json()) .and(warp::body::json())
@ -152,10 +159,20 @@ async fn main() -> anyhow::Result<()> {
// We write the headers // We write the headers
body.extend_from_slice(headers); body.extend_from_slice(headers);
let re = Regex::new(r"(?i)(hello)").unwrap();
for id in documents_ids { for id in documents_ids {
let content = index.documents.get(&rtxn, &BEU32::new(id)).unwrap(); let content = index.documents.get(&rtxn, &BEU32::new(id)).unwrap();
let content = content.expect(&format!("could not find document {}", id)); let content = content.expect(&format!("could not find document {}", id));
body.extend_from_slice(&content); let content = std::str::from_utf8(content).unwrap();
let content = if disable_highlighting {
Cow::from(content)
} else {
re.replace_all(content, "<mark>$1</mark>")
};
body.extend_from_slice(content.as_bytes());
} }
} }