From ce789682ccf7159023cf3eeb1a86c20ab821ecb0 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 12 May 2020 16:49:13 +0200 Subject: [PATCH 1/4] remove unnecessary clone --- meilisearch-http/src/helpers/meilisearch.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/meilisearch-http/src/helpers/meilisearch.rs b/meilisearch-http/src/helpers/meilisearch.rs index 10fdd9c21..d9d7b3f17 100644 --- a/meilisearch-http/src/helpers/meilisearch.rs +++ b/meilisearch-http/src/helpers/meilisearch.rs @@ -214,7 +214,7 @@ impl<'a> SearchBuilder<'a> { // Transform to readable matches if let Some(attributes_to_highlight) = &self.attributes_to_highlight { let matches = calculate_matches( - matches.clone(), + &matches, self.attributes_to_highlight.clone(), &schema, ); @@ -222,7 +222,7 @@ impl<'a> SearchBuilder<'a> { } let matches_info = if self.matches { - Some(calculate_matches(matches, self.attributes_to_retrieve.clone(), &schema)) + Some(calculate_matches(&matches, self.attributes_to_retrieve.clone(), &schema)) } else { None }; @@ -417,7 +417,7 @@ fn crop_document( } fn calculate_matches( - matches: Vec, + matches: &[Highlight], attributes_to_retrieve: Option>, schema: &Schema, ) -> MatchesInfos { @@ -544,7 +544,7 @@ mod tests { let schema = Schema::with_primary_key("title"); - let matches_result = super::calculate_matches(matches, Some(attributes_to_retrieve), &schema); + let matches_result = super::calculate_matches(&matches, Some(attributes_to_retrieve), &schema); let mut matches_result_expected: HashMap> = HashMap::new(); From a94ee167fc07c510610eca4e3548d67ecab021a9 Mon Sep 17 00:00:00 2001 From: mpostma Date: Wed, 13 May 2020 15:57:58 +0200 Subject: [PATCH 2/4] fix unaligned highlight --- meilisearch-http/src/helpers/meilisearch.rs | 61 ++++++++++++--------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/meilisearch-http/src/helpers/meilisearch.rs b/meilisearch-http/src/helpers/meilisearch.rs index d9d7b3f17..ccd6a1977 100644 --- a/meilisearch-http/src/helpers/meilisearch.rs +++ b/meilisearch-http/src/helpers/meilisearch.rs @@ -352,10 +352,14 @@ fn aligned_crop(text: &str, match_index: usize, context: usize) -> (usize, usize return (match_index, 1 + text.chars().skip(match_index).take_while(is_word_component).count()); } let start = match match_index.saturating_sub(context) { - n if n == 0 => n, - n => word_end_index(n) + 0 => 0, + n => { + let word_end_index = word_end_index(n); + // skip whitespaces if any + word_end_index + text.chars().skip(word_end_index).take_while(char::is_ascii_whitespace).count() + } }; - let end = word_end_index(start + 2 * context); + let end = word_end_index(match_index + context); (start, end - start) } @@ -370,15 +374,21 @@ fn crop_text( let char_index = matches.peek().map(|m| m.char_index as usize).unwrap_or(0); let (start, count) = aligned_crop(text, char_index, context); - //TODO do something about the double allocation - let text = text.chars().skip(start).take(count).collect::().trim().to_string(); + // TODO do something about double allocation + let text = text + .chars() + .skip(start) + .take(count) + .collect::() + .trim() + .to_string(); // update matches index to match the new cropped text let matches = matches - .take_while(|m| (m.char_index as usize) + (m.char_length as usize) <= start + (context * 2)) - .map(|match_| Highlight { - char_index: match_.char_index - start as u16, - ..match_ + .take_while(|m| (m.char_index as usize) + (m.char_length as usize) <= start + count) + .map(|m| Highlight { + char_index: m.char_index - start as u16, + ..m }) .collect(); @@ -424,7 +434,7 @@ fn calculate_matches( let mut matches_result: HashMap> = HashMap::new(); for m in matches.iter() { if let Some(attribute) = schema.name(FieldId::new(m.attribute)) { - if let Some(attributes_to_retrieve) = attributes_to_retrieve.clone() { + if let Some(ref attributes_to_retrieve) = attributes_to_retrieve { if !attributes_to_retrieve.contains(attribute) { continue; } @@ -470,21 +480,20 @@ fn calculate_highlights( let longest_matches = matches .linear_group_by_key(|m| m.start) - .map(|group| group.last().unwrap()); + .map(|group| group.last().unwrap()) + .filter(move |m| m.start >= index); for m in longest_matches { - if m.start >= index { - let before = value.get(index..m.start); - let highlighted = value.get(m.start..(m.start + m.length)); - if let (Some(before), Some(highlighted)) = (before, highlighted) { - highlighted_value.extend(before); - highlighted_value.push_str(""); - highlighted_value.extend(highlighted); - highlighted_value.push_str(""); - index = m.start + m.length; - } else { - error!("value: {:?}; index: {:?}, match: {:?}", value, index, m); - } + let before = value.get(index..m.start); + let highlighted = value.get(m.start..(m.start + m.length)); + if let (Some(before), Some(highlighted)) = (before, highlighted) { + highlighted_value.extend(before); + highlighted_value.push_str(""); + highlighted_value.extend(highlighted); + highlighted_value.push_str(""); + index = m.start + m.length; + } else { + error!("value: {:?}; index: {:?}, match: {:?}", value, index, m); } } highlighted_value.extend(value[index..].iter()); @@ -492,7 +501,6 @@ fn calculate_highlights( }; } } - highlight_result } @@ -524,13 +532,12 @@ mod tests { // mixed charset let (start, length) = aligned_crop(&text, 5, 3); let cropped = text.chars().skip(start).take(length).collect::().trim().to_string(); - assert_eq!("isのス", cropped); + assert_eq!("isの", cropped); // split regular word / CJK word, no space let (start, length) = aligned_crop(&text, 7, 1); let cropped = text.chars().skip(start).take(length).collect::().trim().to_string(); - assert_eq!("のス", cropped); - + assert_eq!("の", cropped); } #[test] From 54707e4e2487152b76d66238077c0ab3c0b6e400 Mon Sep 17 00:00:00 2001 From: mpostma Date: Thu, 14 May 2020 11:03:24 +0200 Subject: [PATCH 3/4] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a30895e79..ad5646171 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ## v0.10.2 - Disable sentry in debug (#681) + - Fix highlight misalignment (#679) - Add support for facet count (#676) - Add support for faceted search (#631) - Add support for configuring the lmdb map size (#646, #647) From 9c1de3adfc82d1817579aeecd46e1d95d7ef183e Mon Sep 17 00:00:00 2001 From: mpostma Date: Thu, 14 May 2020 12:50:59 +0200 Subject: [PATCH 4/4] add tests --- meilisearch-http/tests/search.rs | 64 ++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/meilisearch-http/tests/search.rs b/meilisearch-http/tests/search.rs index 58a66cd39..c4c3b0a30 100644 --- a/meilisearch-http/tests/search.rs +++ b/meilisearch-http/tests/search.rs @@ -1343,3 +1343,67 @@ async fn test_facet_count() { let (_response, status_code) = server.search(query).await; assert_eq!(status_code, 400); } + +#[actix_rt::test] +async fn highlight_cropped_text() { + let mut server = common::Server::with_uid("test"); + + let body = json!({ + "uid": "test", + "primaryKey": "id", + }); + server.create_index(body).await; + + let doc = json!([ + { + "id": 1, + "body": r##"well, it may not work like that, try the following: +1. insert your trip +2. google your `searchQuery` +3. find a solution +> say hello"## + } + ]); + server.add_or_replace_multiple_documents(doc).await; + + // tests from #680 + let query = "q=insert&attributesToHighlight=*&attributesToCrop=body&cropLength=30"; + let (response, _status_code) = server.search(query).await; + let expected_response = "that, try the following: \n1. insert your trip\n2. google your"; + assert_eq!(response + .get("hits") + .unwrap() + .as_array() + .unwrap() + .get(0) + .unwrap() + .as_object() + .unwrap() + .get("_formatted") + .unwrap() + .as_object() + .unwrap() + .get("body") + .unwrap() + , &Value::String(expected_response.to_owned())); + + let query = "q=insert&attributesToHighlight=*&attributesToCrop=body&cropLength=80"; + let (response, _status_code) = server.search(query).await; + let expected_response = "well, it may not work like that, try the following: \n1. insert your trip\n2. google your `searchQuery`\n3. find a solution \n> say hello"; + assert_eq!(response + .get("hits") + .unwrap() + .as_array() + .unwrap() + .get(0) + .unwrap() + .as_object() + .unwrap() + .get("_formatted") + .unwrap() + .as_object() + .unwrap() + .get("body") + .unwrap() + , &Value::String(expected_response.to_owned())); +}