From 640c7d748aa956329134955cd2600a593c95997c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 5 Nov 2020 13:58:07 +0100 Subject: [PATCH] Modify the highlight function to support any JSON type --- Cargo.lock | 2 -- Cargo.toml | 1 - http-ui/Cargo.lock | 1 - http-ui/Cargo.toml | 1 - http-ui/src/main.rs | 81 +++++++++++++++++++++++++++++---------------- 5 files changed, 53 insertions(+), 33 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0d16ce9f7..63e30b65c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -447,7 +447,6 @@ checksum = "55e2e4c765aa53a0424761bf9f41aa7a6ac1efa87238f59560640e27fca028f2" dependencies = [ "autocfg", "hashbrown", - "serde", ] [[package]] @@ -604,7 +603,6 @@ dependencies = [ "grenad", "heed", "human_format", - "indexmap", "itertools", "jemallocator", "levenshtein_automata", diff --git a/Cargo.toml b/Cargo.toml index 70a65623f..441397275 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,6 @@ fxhash = "0.2.1" grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3eb7ad9" } heed = { version = "0.10.1", default-features = false, features = ["lmdb", "sync-read-txn"] } human_format = "1.0.3" -indexmap = { version = "1.6.0", features = ["serde-1"] } jemallocator = "0.3.2" levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] } linked-hash-map = "0.5.3" diff --git a/http-ui/Cargo.lock b/http-ui/Cargo.lock index e3c265377..b6539085f 100644 --- a/http-ui/Cargo.lock +++ b/http-ui/Cargo.lock @@ -731,7 +731,6 @@ dependencies = [ "futures", "grenad", "heed", - "indexmap", "log", "memmap", "milli", diff --git a/http-ui/Cargo.toml b/http-ui/Cargo.toml index 184e05a98..7e28e1211 100644 --- a/http-ui/Cargo.toml +++ b/http-ui/Cargo.toml @@ -9,7 +9,6 @@ edition = "2018" anyhow = "1.0.28" grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3eb7ad9" } heed = "0.10.1" -indexmap = "1.6.0" memmap = "0.7.0" milli = { path = ".." } once_cell = "1.4.1" diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index 2a9f9de69..bfd326837 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -14,10 +14,10 @@ use futures::stream; use futures::{FutureExt, StreamExt}; use grenad::CompressionType; use heed::EnvOpenOptions; -use indexmap::IndexMap; use once_cell::sync::OnceCell; use rayon::ThreadPool; use serde::{Serialize, Deserialize, Deserializer}; +use serde_json::{Map, Value}; use structopt::StructOpt; use tokio::fs::File as TFile; use tokio::io::AsyncWriteExt; @@ -27,7 +27,7 @@ use warp::{Filter, http::Response}; use milli::tokenizer::{simple_tokenizer, TokenType}; use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat}; -use milli::{Index, UpdateStore, SearchResult}; +use milli::{obkv_to_json, Index, UpdateStore, SearchResult}; static GLOBAL_THREAD_POOL: OnceCell = OnceCell::new(); @@ -117,19 +117,49 @@ pub struct IndexerOpt { pub indexing_jobs: Option, } -fn highlight_record(record: &mut IndexMap, words: &HashSet) { - for (_key, value) in record.iter_mut() { - let old_value = mem::take(value); - for (token_type, token) in simple_tokenizer(&old_value) { - if token_type == TokenType::Word { - let lowercase_token = token.to_lowercase(); - let to_highlight = words.contains(&lowercase_token); - if to_highlight { value.push_str("") } - value.push_str(token); - if to_highlight { value.push_str("") } - } else { - value.push_str(token); - } +fn highlight_record( + object: &mut Map, + words_to_highlight: &HashSet, + attributes_to_highlight: &HashSet, +) { + // TODO do we need to create a string for element that are not and needs to be highlight? + fn highlight_value(value: Value, words_to_highlight: &HashSet) -> Value { + match value { + Value::Null => Value::Null, + Value::Bool(boolean) => Value::Bool(boolean), + Value::Number(number) => Value::Number(number), + Value::String(old_string) => { + let mut string = String::new(); + for (token_type, token) in simple_tokenizer(&old_string) { + if token_type == TokenType::Word { + let lowercase_token = token.to_lowercase(); + let to_highlight = words_to_highlight.contains(&lowercase_token); + if to_highlight { string.push_str("") } + string.push_str(token); + if to_highlight { string.push_str("") } + } else { + string.push_str(token); + } + } + Value::String(string) + }, + Value::Array(values) => { + Value::Array(values.into_iter() + .map(|v| highlight_value(v, words_to_highlight)) + .collect()) + }, + Value::Object(object) => { + Value::Object(object.into_iter() + .map(|(k, v)| (k, highlight_value(v, words_to_highlight))) + .collect()) + }, + } + } + + for (key, value) in object.iter_mut() { + if attributes_to_highlight.contains(key) { + let old_value = mem::take(value); + *value = highlight_value(old_value, words_to_highlight); } } } @@ -517,23 +547,18 @@ async fn main() -> anyhow::Result<()> { Some(fields) => Cow::Borrowed(fields), None => Cow::Owned(fields_ids_map.iter().map(|(id, _)| id).collect()), }; + let attributes_to_highlight = match index.searchable_fields(&rtxn).unwrap() { + Some(fields) => fields.iter().flat_map(|id| fields_ids_map.name(*id)).map(ToOwned::to_owned).collect(), + None => fields_ids_map.iter().map(|(_, name)| name).map(ToOwned::to_owned).collect(), + }; - for (_id, record) in index.documents(&rtxn, documents_ids).unwrap() { - let mut record = displayed_fields.iter() - .flat_map(|&id| record.get(id).map(|val| (id, val))) - .map(|(key_id, value)| { - let key = fields_ids_map.name(key_id).unwrap().to_owned(); - // TODO we must deserialize a Json Value and highlight it. - let value = serde_json::from_slice(value).unwrap(); - (key, value) - }) - .collect(); - + for (_id, obkv) in index.documents(&rtxn, documents_ids).unwrap() { + let mut object = obkv_to_json(&displayed_fields, &fields_ids_map, obkv).unwrap(); if !disable_highlighting { - highlight_record(&mut record, &found_words); + highlight_record(&mut object, &found_words, &attributes_to_highlight); } - documents.push(record); + documents.push(object); } Response::builder()