mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 03:55:07 +08:00
Replace the regex highlighting by a simple algorithm
This commit is contained in:
parent
4873abe145
commit
6508d497ce
24
Cargo.lock
generated
24
Cargo.lock
generated
@ -6,15 +6,6 @@ version = "0.2.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ccc9a9dd069569f212bc4330af9f17c4afb5e8ce185e83dbb14f1349dda18b10"
|
checksum = "ccc9a9dd069569f212bc4330af9f17c4afb5e8ce185e83dbb14f1349dda18b10"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "aho-corasick"
|
|
||||||
version = "0.7.13"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "043164d8ba5c4c3035fec9bbee8647c0261d788f3474306f93bb65901cae0e86"
|
|
||||||
dependencies = [
|
|
||||||
"memchr",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "anyhow"
|
name = "anyhow"
|
||||||
version = "1.0.31"
|
version = "1.0.31"
|
||||||
@ -997,7 +988,6 @@ dependencies = [
|
|||||||
"once_cell",
|
"once_cell",
|
||||||
"oxidized-mtbl",
|
"oxidized-mtbl",
|
||||||
"rayon",
|
"rayon",
|
||||||
"regex",
|
|
||||||
"roaring",
|
"roaring",
|
||||||
"serde",
|
"serde",
|
||||||
"slice-group-by",
|
"slice-group-by",
|
||||||
@ -1601,10 +1591,7 @@ version = "1.3.9"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6"
|
checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick",
|
|
||||||
"memchr",
|
|
||||||
"regex-syntax",
|
"regex-syntax",
|
||||||
"thread_local 1.0.1",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -1822,7 +1809,7 @@ dependencies = [
|
|||||||
"chrono",
|
"chrono",
|
||||||
"log 0.4.8",
|
"log 0.4.8",
|
||||||
"termcolor",
|
"termcolor",
|
||||||
"thread_local 0.3.4",
|
"thread_local",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -1937,15 +1924,6 @@ dependencies = [
|
|||||||
"unreachable",
|
"unreachable",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "thread_local"
|
|
||||||
version = "1.0.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
|
|
||||||
dependencies = [
|
|
||||||
"lazy_static 1.4.0",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "time"
|
name = "time"
|
||||||
version = "0.1.43"
|
version = "0.1.43"
|
||||||
|
@ -28,9 +28,6 @@ smallvec = "1.4.0"
|
|||||||
structopt = { version = "0.3.14", default-features = false }
|
structopt = { version = "0.3.14", default-features = false }
|
||||||
tempfile = "3.1.0"
|
tempfile = "3.1.0"
|
||||||
|
|
||||||
# to highlight the documents
|
|
||||||
regex = "1.3.9"
|
|
||||||
|
|
||||||
# logging
|
# logging
|
||||||
log = "0.4.8"
|
log = "0.4.8"
|
||||||
stderrlog = "0.4.3"
|
stderrlog = "0.4.3"
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
use std::collections::HashSet;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::net::SocketAddr;
|
use std::net::SocketAddr;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
@ -7,8 +8,8 @@ use std::time::Instant;
|
|||||||
|
|
||||||
use askama_warp::Template;
|
use askama_warp::Template;
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use regex::Regex;
|
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
use slice_group_by::StrGroupBy;
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
use warp::{Filter, http::Response};
|
use warp::{Filter, http::Response};
|
||||||
|
|
||||||
@ -44,6 +45,18 @@ struct Opt {
|
|||||||
http_listen_addr: String,
|
http_listen_addr: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn highlight_string(string: &str, words: &HashSet<String>) -> String {
|
||||||
|
let mut output = String::new();
|
||||||
|
for token in string.linear_group_by_key(|c| c.is_alphanumeric()) {
|
||||||
|
let lowercase_token = token.to_lowercase();
|
||||||
|
let to_highlight = words.contains(&lowercase_token);
|
||||||
|
if to_highlight { output.push_str("<mark>") }
|
||||||
|
output.push_str(token);
|
||||||
|
if to_highlight { output.push_str("</mark>") }
|
||||||
|
}
|
||||||
|
output
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Template)]
|
#[derive(Template)]
|
||||||
#[template(path = "index.html")]
|
#[template(path = "index.html")]
|
||||||
struct IndexTemplate {
|
struct IndexTemplate {
|
||||||
@ -173,15 +186,6 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
// We write the headers
|
// We write the headers
|
||||||
body.extend_from_slice(headers);
|
body.extend_from_slice(headers);
|
||||||
|
|
||||||
let mut regex = format!(r"(?i)\b(");
|
|
||||||
let number_of_words = words.len();
|
|
||||||
words.into_iter().enumerate().for_each(|(i, w)| {
|
|
||||||
regex.push_str(&w);
|
|
||||||
if i != number_of_words - 1 { regex.push('|') }
|
|
||||||
});
|
|
||||||
regex.push_str(r")\b");
|
|
||||||
let re = Regex::new(®ex).unwrap();
|
|
||||||
|
|
||||||
for id in documents_ids {
|
for id in documents_ids {
|
||||||
let content = index.documents.get(&rtxn, &BEU32::new(id)).unwrap();
|
let content = index.documents.get(&rtxn, &BEU32::new(id)).unwrap();
|
||||||
let content = content.expect(&format!("could not find document {}", id));
|
let content = content.expect(&format!("could not find document {}", id));
|
||||||
@ -190,7 +194,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
let content = if disable_highlighting {
|
let content = if disable_highlighting {
|
||||||
Cow::from(content)
|
Cow::from(content)
|
||||||
} else {
|
} else {
|
||||||
re.replace_all(content, "<mark>$1</mark>")
|
Cow::from(highlight_string(content, &words))
|
||||||
};
|
};
|
||||||
|
|
||||||
body.extend_from_slice(content.as_bytes());
|
body.extend_from_slice(content.as_bytes());
|
||||||
|
Loading…
Reference in New Issue
Block a user