From 85bf5d113ca03cc95583d1186d65f4d0e6826f5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 5 Nov 2019 16:40:34 +0100 Subject: [PATCH] Fix an highlighting problem when query was longer than original text --- meilidb-core/src/query_builder.rs | 9 ++++++--- meilidb-core/src/raw_indexer.rs | 20 +++++--------------- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs index 04301c574..42a292c6f 100644 --- a/meilidb-core/src/query_builder.rs +++ b/meilidb-core/src/query_builder.rs @@ -1,9 +1,9 @@ use hashbrown::HashMap; use std::convert::TryFrom; -use std::mem; use std::ops::Range; use std::rc::Rc; use std::time::{Duration, Instant}; +use std::{cmp, mem}; use fst::{IntoStreamer, Streamer}; use sdset::SetBuf; @@ -178,7 +178,7 @@ fn fetch_raw_documents( let distance = dfa.eval(input).to_u8(); let is_exact = *is_exact && distance == 0 && input.len() == *query_len; - let covered_area = if query.len() > input.len() { + let covered_area = if *query_len > input.len() { input.len() } else { prefix_damerau_levenshtein(query.as_bytes(), input).1 @@ -202,10 +202,13 @@ fn fetch_raw_documents( is_exact, }; + let covered_area = u16::try_from(covered_area).unwrap_or(u16::max_value()); + let covered_area = cmp::min(covered_area, di.char_length); + let highlight = Highlight { attribute: di.attribute, char_index: di.char_index, - char_length: u16::try_from(covered_area).unwrap_or(u16::max_value()), + char_length: covered_area, }; tmp_matches.push((di.document_id, id, match_, highlight)); diff --git a/meilidb-core/src/raw_indexer.rs b/meilidb-core/src/raw_indexer.rs index 3e0f212f7..f4304a33a 100644 --- a/meilidb-core/src/raw_indexer.rs +++ b/meilidb-core/src/raw_indexer.rs @@ -133,30 +133,20 @@ fn index_token( .or_insert_with(Vec::new) .push(docindex); docs_words.entry(id).or_insert_with(Vec::new).push(word); - } - None => return false, - } - if !lower.contains(is_cjk) { - let unidecoded = deunicode_with_tofu(&lower, ""); - if unidecoded != lower && !unidecoded.is_empty() { - let token = Token { - word: &unidecoded, - ..token - }; - - match token_to_docindex(id, attr, token) { - Some(docindex) => { - let word = Vec::from(token.word); + if !lower.contains(is_cjk) { + let unidecoded = deunicode_with_tofu(&lower, ""); + if unidecoded != lower && !unidecoded.is_empty() { + let word = Vec::from(unidecoded); words_doc_indexes .entry(word.clone()) .or_insert_with(Vec::new) .push(docindex); docs_words.entry(id).or_insert_with(Vec::new).push(word); } - None => return false, } } + None => return false, } }