Highlight all the matching words

This commit is contained in:
Kerollmops 2020-07-14 11:51:21 +02:00
parent 085c376655
commit 9ade00e27b
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
3 changed files with 18 additions and 8 deletions

View File

@ -58,7 +58,7 @@ fn main() -> anyhow::Result<()> {
let before = Instant::now(); let before = Instant::now();
let query = result?; let query = result?;
let documents_ids = index.search(&rtxn, &query)?; let (_, documents_ids) = index.search(&rtxn, &query)?;
let headers = match index.headers(&rtxn)? { let headers = match index.headers(&rtxn)? {
Some(headers) => headers, Some(headers) => headers,
None => return Ok(()), None => return Ok(()),

View File

@ -152,14 +152,21 @@ async fn main() -> anyhow::Result<()> {
let before_search = Instant::now(); let before_search = Instant::now();
let rtxn = env_cloned.read_txn().unwrap(); let rtxn = env_cloned.read_txn().unwrap();
let documents_ids = index.search(&rtxn, &query.query).unwrap(); let (words, documents_ids) = index.search(&rtxn, &query.query).unwrap();
let mut body = Vec::new(); let mut body = Vec::new();
if let Some(headers) = index.headers(&rtxn).unwrap() { if let Some(headers) = index.headers(&rtxn).unwrap() {
// We write the headers // We write the headers
body.extend_from_slice(headers); body.extend_from_slice(headers);
let re = Regex::new(r"(?i)(hello)").unwrap(); let mut regex = format!(r"(?i)\b(");
let number_of_words = words.len();
words.into_iter().enumerate().for_each(|(i, w)| {
regex.push_str(&w);
if i != number_of_words - 1 { regex.push('|') }
});
regex.push_str(r")\b");
let re = Regex::new(&regex).unwrap();
for id in documents_ids { for id in documents_ids {
let content = index.documents.get(&rtxn, &BEU32::new(id)).unwrap(); let content = index.documents.get(&rtxn, &BEU32::new(id)).unwrap();

View File

@ -4,7 +4,7 @@ mod iter_shortest_paths;
mod query_tokens; mod query_tokens;
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashMap; use std::collections::{HashSet, HashMap};
use std::hash::BuildHasherDefault; use std::hash::BuildHasherDefault;
use std::time::Instant; use std::time::Instant;
@ -96,10 +96,10 @@ impl Index {
} }
} }
pub fn search(&self, rtxn: &heed::RoTxn, query: &str) -> anyhow::Result<Vec<DocumentId>> { pub fn search(&self, rtxn: &heed::RoTxn, query: &str) -> anyhow::Result<(HashSet<String>, Vec<DocumentId>)> {
let fst = match self.fst(rtxn)? { let fst = match self.fst(rtxn)? {
Some(fst) => fst, Some(fst) => fst,
None => return Ok(vec![]), None => return Ok(Default::default()),
}; };
let (lev0, lev1, lev2) = (&LEVDIST0, &LEVDIST1, &LEVDIST2); let (lev0, lev1, lev2) = (&LEVDIST0, &LEVDIST1, &LEVDIST2);
@ -342,7 +342,10 @@ impl Index {
} }
} }
debug!("{} candidates", documents.iter().map(RoaringBitmap::len).sum::<u64>()); debug!("{} final candidates", documents.iter().map(RoaringBitmap::len).sum::<u64>());
Ok(documents.iter().flatten().take(20).collect()) let words = words.into_iter().flatten().map(|(w, _)| String::from_utf8(w).unwrap()).collect();
let documents = documents.iter().flatten().take(20).collect();
Ok((words, documents))
} }
} }