diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 783c3d3e0..fbc47cd4e 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -1,13 +1,12 @@ use std::borrow::Cow; -use std::collections::{BTreeMap, HashSet}; +use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::time::Instant; use anyhow::bail; use either::Either; use heed::RoTxn; use indexmap::IndexMap; -use itertools::Itertools; -use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; +use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token}; use milli::{FilterCondition, FieldId, FieldsIdsMap, MatchingWords}; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -17,11 +16,15 @@ use super::Index; pub type Document = IndexMap; pub const DEFAULT_SEARCH_LIMIT: usize = 20; - const fn default_search_limit() -> usize { DEFAULT_SEARCH_LIMIT } +pub const DEFAULT_CROP_LENGTH: usize = 200; +const fn default_crop_length() -> usize { + DEFAULT_CROP_LENGTH +} + #[derive(Deserialize)] #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct SearchQuery { @@ -29,9 +32,10 @@ pub struct SearchQuery { pub offset: Option, #[serde(default = "default_search_limit")] pub limit: usize, - pub attributes_to_retrieve: Option>, - pub attributes_to_crop: Option>, - pub crop_length: Option, + pub attributes_to_retrieve: Option>, + pub attributes_to_crop: Option>, + #[serde(default = "default_crop_length")] + pub crop_length: usize, pub attributes_to_highlight: Option>, pub matches: Option, pub filter: Option, @@ -60,6 +64,12 @@ pub struct SearchResult { pub facet_distributions: Option>>, } +#[derive(Copy, Clone)] +struct FormatOptions { + highlight: bool, + crop: Option, +} + impl Index { pub fn perform_search(&self, query: SearchQuery) -> anyhow::Result { let before_search = Instant::now(); @@ -75,7 +85,7 @@ impl Index { search.offset(query.offset.unwrap_or_default()); if let Some(ref filter) = query.filter { - if let Some(facets) = parse_facets(filter, self, &rtxn)? { + if let Some(facets) = parse_filter(filter, self, &rtxn)? { search.filter(facets); } } @@ -91,11 +101,11 @@ impl Index { let displayed_ids = self .displayed_fields_ids(&rtxn)? - .map(|fields| fields.into_iter().collect::>()) + .map(|fields| fields.into_iter().collect::>()) .unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect()); - let fids = |attrs: &HashSet| { - let mut ids = HashSet::new(); + let fids = |attrs: &BTreeSet| { + let mut ids = BTreeSet::new(); for attr in attrs { if attr == "*" { ids = displayed_ids.clone(); @@ -109,65 +119,53 @@ impl Index { ids }; - let to_retrieve_ids = query + // The attributes to retrieve are the ones explicitly marked as to retrieve (all by default), + // but these attributes must be also be present + // - in the fields_ids_map + // - in the the displayed attributes + let to_retrieve_ids: BTreeSet<_> = query .attributes_to_retrieve .as_ref() .map(fids) - .unwrap_or_else(|| displayed_ids.clone()); - - let to_highlight_ids = query - .attributes_to_highlight - .as_ref() - .map(fids) - .unwrap_or_default(); - - let to_crop_ids = query - .attributes_to_crop - .as_ref() - .map(fids) - .unwrap_or_default(); - - // The attributes to retrieve are: - // - the ones explicitly marked as to retrieve that are also in the displayed attributes - let all_attributes: Vec<_> = to_retrieve_ids + .unwrap_or_else(|| displayed_ids.clone()) .intersection(&displayed_ids) .cloned() - .sorted() .collect(); - // The formatted attributes are: - // - The one in either highlighted attributes or cropped attributes if there are attributes - // to retrieve - // - All the attributes to retrieve if there are either highlighted or cropped attributes - // the request specified that all attributes are to retrieve (i.e attributes to retrieve is - // empty in the query) - let all_formatted = if query.attributes_to_retrieve.is_none() { - if query.attributes_to_highlight.is_some() || query.attributes_to_crop.is_some() { - Cow::Borrowed(&all_attributes) - } else { - Cow::Owned(Vec::new()) - } - } else { - let attrs = (&to_crop_ids | &to_highlight_ids) - .intersection(&displayed_ids) - .cloned() - .collect::>(); - Cow::Owned(attrs) - }; + let attr_to_highlight = query + .attributes_to_highlight + .unwrap_or_default(); + + let attr_to_crop = query + .attributes_to_crop + .unwrap_or_default(); + + // Attributes in `formatted_options` correspond to the attributes that will be in `_formatted` + // These attributes are: + // - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`) + // - the attributes asked to be retrieved: these attributes will not be highlighted/cropped + // But these attributes must be also present in displayed attributes + let formatted_options = compute_formatted_options( + &attr_to_highlight, + &attr_to_crop, + query.crop_length, + &to_retrieve_ids, + &fields_ids_map, + &displayed_ids, + ); let stop_words = fst::Set::default(); - let highlighter = - Highlighter::new(&stop_words, (String::from(""), String::from(""))); + let formatter = + Formatter::new(&stop_words, (String::from(""), String::from(""))); for (_id, obkv) in self.documents(&rtxn, documents_ids)? { - let document = make_document(&all_attributes, &fields_ids_map, obkv)?; - let formatted = compute_formatted( + let document = make_document(&to_retrieve_ids, &fields_ids_map, obkv)?; + let formatted = format_fields( &fields_ids_map, obkv, - &highlighter, + &formatter, &matching_words, - all_formatted.as_ref().as_slice(), - &to_highlight_ids, + &formatted_options, )?; let hit = SearchHit { document, @@ -203,8 +201,129 @@ impl Index { } } +fn compute_formatted_options( + attr_to_highlight: &HashSet, + attr_to_crop: &[String], + query_crop_length: usize, + to_retrieve_ids: &BTreeSet, + fields_ids_map: &FieldsIdsMap, + displayed_ids: &BTreeSet, + ) -> BTreeMap { + + let mut formatted_options = BTreeMap::new(); + + add_highlight_to_formatted_options( + &mut formatted_options, + attr_to_highlight, + fields_ids_map, + displayed_ids, + ); + + add_crop_to_formatted_options( + &mut formatted_options, + attr_to_crop, + query_crop_length, + fields_ids_map, + displayed_ids, + ); + + // Should not return `_formatted` if no valid attributes to highlight/crop + if !formatted_options.is_empty() { + add_non_formatted_ids_to_formatted_options( + &mut formatted_options, + to_retrieve_ids, + ); + } + + formatted_options +} + +fn add_highlight_to_formatted_options( + formatted_options: &mut BTreeMap, + attr_to_highlight: &HashSet, + fields_ids_map: &FieldsIdsMap, + displayed_ids: &BTreeSet, +) { + for attr in attr_to_highlight { + let new_format = FormatOptions { + highlight: true, + crop: None, + }; + + if attr == "*" { + for id in displayed_ids { + formatted_options.insert(*id, new_format); + } + break; + } + + if let Some(id) = fields_ids_map.id(&attr) { + if displayed_ids.contains(&id) { + formatted_options.insert(id, new_format); + } + } + } +} + +fn add_crop_to_formatted_options( + formatted_options: &mut BTreeMap, + attr_to_crop: &[String], + crop_length: usize, + fields_ids_map: &FieldsIdsMap, + displayed_ids: &BTreeSet, +) { + for attr in attr_to_crop { + let mut split = attr.rsplitn(2, ':'); + let (attr_name, attr_len) = match split.next().zip(split.next()) { + Some((len, name)) => { + let crop_len = len.parse::().unwrap_or(crop_length); + (name, crop_len) + }, + None => (attr.as_str(), crop_length), + }; + + if attr_name == "*" { + for id in displayed_ids { + formatted_options + .entry(*id) + .and_modify(|f| f.crop = Some(attr_len)) + .or_insert(FormatOptions { + highlight: false, + crop: Some(attr_len), + }); + } + } + + if let Some(id) = fields_ids_map.id(&attr_name) { + if displayed_ids.contains(&id) { + formatted_options + .entry(id) + .and_modify(|f| f.crop = Some(attr_len)) + .or_insert(FormatOptions { + highlight: false, + crop: Some(attr_len), + }); + } + } + } +} + +fn add_non_formatted_ids_to_formatted_options( + formatted_options: &mut BTreeMap, + to_retrieve_ids: &BTreeSet +) { + for id in to_retrieve_ids { + formatted_options + .entry(*id) + .or_insert(FormatOptions { + highlight: false, + crop: None, + }); + } +} + fn make_document( - attributes_to_retrieve: &[FieldId], + attributes_to_retrieve: &BTreeSet, field_ids_map: &FieldsIdsMap, obkv: obkv::KvReader, ) -> anyhow::Result { @@ -226,28 +345,29 @@ fn make_document( Ok(document) } -fn compute_formatted>( +fn format_fields>( field_ids_map: &FieldsIdsMap, obkv: obkv::KvReader, - highlighter: &Highlighter, + formatter: &Formatter, matching_words: &impl Matcher, - all_formatted: &[FieldId], - to_highlight_ids: &HashSet, + formatted_options: &BTreeMap, ) -> anyhow::Result { let mut document = Document::new(); - for field in all_formatted { - if let Some(value) = obkv.get(*field) { + for (id, format) in formatted_options { + if let Some(value) = obkv.get(*id) { let mut value: Value = serde_json::from_slice(value)?; - if to_highlight_ids.contains(field) { - value = highlighter.highlight_value(value, matching_words); - } + value = formatter.format_value( + value, + matching_words, + *format, + ); // This unwrap must be safe since we got the ids from the fields_ids_map just // before. let key = field_ids_map - .name(*field) + .name(*id) .expect("Missing field name") .to_string(); @@ -258,30 +378,30 @@ fn compute_formatted>( Ok(document) } -/// trait to allow unit testing of `compute_formated` +/// trait to allow unit testing of `format_fields` trait Matcher { - fn matches(&self, w: &str) -> bool; + fn matches(&self, w: &str) -> Option; } #[cfg(test)] -impl Matcher for HashSet { - fn matches(&self, w: &str) -> bool { - self.contains(w) +impl Matcher for BTreeMap<&str, Option> { + fn matches(&self, w: &str) -> Option { + self.get(w).cloned().flatten() } } impl Matcher for MatchingWords { - fn matches(&self, w: &str) -> bool { - self.matching_bytes(w).is_some() + fn matches(&self, w: &str) -> Option { + self.matching_bytes(w) } } -struct Highlighter<'a, A> { +struct Formatter<'a, A> { analyzer: Analyzer<'a, A>, marks: (String, String), } -impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { +impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { pub fn new(stop_words: &'a fst::Set, marks: (String, String)) -> Self { let mut config = AnalyzerConfig::default(); config.stop_words(stop_words); @@ -291,59 +411,121 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { Self { analyzer, marks } } - fn highlight_value(&self, value: Value, words_to_highlight: &impl Matcher) -> Value { + fn format_value( + &self, + value: Value, + matcher: &impl Matcher, + format_options: FormatOptions, + ) -> Value { match value { - Value::Null => Value::Null, - Value::Bool(boolean) => Value::Bool(boolean), - Value::Number(number) => Value::Number(number), Value::String(old_string) => { - let mut string = String::new(); - let analyzed = self.analyzer.analyze(&old_string); - for (word, token) in analyzed.reconstruct() { - if token.is_word() { - let to_highlight = words_to_highlight.matches(token.text()); - if to_highlight { - string.push_str(&self.marks.0) - } - string.push_str(word); - if to_highlight { - string.push_str(&self.marks.1) - } - } else { - string.push_str(word); - } - } - Value::String(string) + let value = + self.format_string(old_string, matcher, format_options); + Value::String(value) } Value::Array(values) => Value::Array( values .into_iter() - .map(|v| self.highlight_value(v, words_to_highlight)) + .map(|v| self.format_value(v, matcher, FormatOptions { highlight: format_options.highlight, crop: None })) .collect(), ), Value::Object(object) => Value::Object( object .into_iter() - .map(|(k, v)| (k, self.highlight_value(v, words_to_highlight))) + .map(|(k, v)| (k, self.format_value(v, matcher, FormatOptions { highlight: format_options.highlight, crop: None }))) .collect(), ), + value => value, } } + + fn format_string( + &self, + s: String, + matcher: &impl Matcher, + format_options: FormatOptions, + ) -> String { + let analyzed = self.analyzer.analyze(&s); + + let tokens: Box> = match format_options.crop { + Some(crop_len) => { + let mut buffer = Vec::new(); + let mut tokens = analyzed.reconstruct().peekable(); + + while let Some((word, token)) = tokens.next_if(|(_, token)| matcher.matches(token.text()).is_none()) { + buffer.push((word, token)); + } + + match tokens.next() { + Some(token) => { + let mut total_len: usize = buffer.iter().map(|(word, _)| word.len()).sum(); + let before_iter = buffer.into_iter().skip_while(move |(word, _)| { + total_len -= word.len(); + total_len >= crop_len + }); + + let mut taken_after = 0; + let after_iter = tokens + .take_while(move |(word, _)| { + let take = taken_after < crop_len; + taken_after += word.chars().count(); + take + }); + + let iter = before_iter + .chain(Some(token)) + .chain(after_iter); + + Box::new(iter) + }, + // If no word matches in the attribute + None => { + let mut count = 0; + let iter = buffer.into_iter().take_while(move |(word, _)| { + let take = count < crop_len; + count += word.len(); + take + }); + + Box::new(iter) + } + } + } + None => Box::new(analyzed.reconstruct()), + }; + + tokens + .map(|(word, token)| { + if format_options.highlight && token.is_word() && matcher.matches(token.text()).is_some() { + let mut new_word = String::new(); + new_word.push_str(&self.marks.0); + if let Some(match_len) = matcher.matches(token.text()) { + new_word.push_str(&word[..match_len]); + new_word.push_str(&self.marks.1); + new_word.push_str(&word[match_len..]); + } + Cow::Owned(new_word) + } else { + Cow::Borrowed(word) + } + }) + .collect::() + } } -fn parse_facets( +fn parse_filter( facets: &Value, index: &Index, txn: &RoTxn, ) -> anyhow::Result> { match facets { Value::String(expr) => Ok(Some(FilterCondition::from_str(txn, index, expr)?)), - Value::Array(arr) => parse_facets_array(txn, index, arr), + Value::Array(arr) => parse_filter_array(txn, index, arr), v => bail!("Invalid facet expression, expected Array, found: {:?}", v), } } -fn parse_facets_array( +fn parse_filter_array( txn: &RoTxn, index: &Index, arr: &[Value], @@ -374,15 +556,13 @@ fn parse_facets_array( #[cfg(test)] mod test { - use std::iter::FromIterator; - use super::*; #[test] - fn no_formatted() { + fn no_ids_no_formatted() { let stop_words = fst::Set::default(); - let highlighter = - Highlighter::new(&stop_words, (String::from(""), String::from(""))); + let formatter = + Formatter::new(&stop_words, (String::from(""), String::from(""))); let mut fields = FieldsIdsMap::new(); let id = fields.insert("test").unwrap(); @@ -395,18 +575,16 @@ mod test { let obkv = obkv::KvReader::new(&buf); - let all_formatted = Vec::new(); - let to_highlight_ids = HashSet::new(); + let formatted_options = BTreeMap::new(); let matching_words = MatchingWords::default(); - let value = compute_formatted( + let value = format_fields( &fields, obkv, - &highlighter, + &formatter, &matching_words, - &all_formatted, - &to_highlight_ids, + &formatted_options, ) .unwrap(); @@ -414,72 +592,296 @@ mod test { } #[test] - fn formatted_no_highlight() { + fn formatted_with_highlight_in_word() { let stop_words = fst::Set::default(); - let highlighter = - Highlighter::new(&stop_words, (String::from(""), String::from(""))); + let formatter = + Formatter::new(&stop_words, (String::from(""), String::from(""))); let mut fields = FieldsIdsMap::new(); - let id = fields.insert("test").unwrap(); + let title = fields.insert("title").unwrap(); + let author = fields.insert("author").unwrap(); let mut buf = Vec::new(); let mut obkv = obkv::KvWriter::new(&mut buf); - obkv.insert(id, Value::String("hello".into()).to_string().as_bytes()) + obkv.insert(title, Value::String("The Hobbit".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(author, Value::String("J. R. R. Tolkien".into()).to_string().as_bytes()) .unwrap(); obkv.finish().unwrap(); let obkv = obkv::KvReader::new(&buf); - let all_formatted = vec![id]; - let to_highlight_ids = HashSet::new(); + let mut formatted_options = BTreeMap::new(); + formatted_options.insert(title, FormatOptions { highlight: true, crop: None }); + formatted_options.insert(author, FormatOptions { highlight: false, crop: None }); - let matching_words = MatchingWords::default(); + let mut matching_words = BTreeMap::new(); + matching_words.insert("hobbit", Some(3)); - let value = compute_formatted( + let value = format_fields( &fields, obkv, - &highlighter, + &formatter, &matching_words, - &all_formatted, - &to_highlight_ids, + &formatted_options, ) .unwrap(); - assert_eq!(value["test"], "hello"); + assert_eq!(value["title"], "The Hobbit"); + assert_eq!(value["author"], "J. R. R. Tolkien"); } #[test] - fn formatted_with_highlight() { + fn formatted_with_crop_2() { let stop_words = fst::Set::default(); - let highlighter = - Highlighter::new(&stop_words, (String::from(""), String::from(""))); + let formatter = + Formatter::new(&stop_words, (String::from(""), String::from(""))); let mut fields = FieldsIdsMap::new(); - let id = fields.insert("test").unwrap(); + let title = fields.insert("title").unwrap(); + let author = fields.insert("author").unwrap(); let mut buf = Vec::new(); let mut obkv = obkv::KvWriter::new(&mut buf); - obkv.insert(id, Value::String("hello".into()).to_string().as_bytes()) + obkv.insert(title, Value::String("Harry Potter and the Half-Blood Prince".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(author, Value::String("J. K. Rowling".into()).to_string().as_bytes()) .unwrap(); obkv.finish().unwrap(); let obkv = obkv::KvReader::new(&buf); - let all_formatted = vec![id]; - let to_highlight_ids = HashSet::from_iter(Some(id)); + let mut formatted_options = BTreeMap::new(); + formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(2) }); + formatted_options.insert(author, FormatOptions { highlight: false, crop: None }); - let matching_words = HashSet::from_iter(Some(String::from("hello"))); + let mut matching_words = BTreeMap::new(); + matching_words.insert("potter", Some(6)); - let value = compute_formatted( + let value = format_fields( &fields, obkv, - &highlighter, + &formatter, &matching_words, - &all_formatted, - &to_highlight_ids, + &formatted_options, ) .unwrap(); - assert_eq!(value["test"], "hello"); + assert_eq!(value["title"], "Harry Potter and"); + assert_eq!(value["author"], "J. K. Rowling"); + } + + #[test] + fn formatted_with_crop_10() { + let stop_words = fst::Set::default(); + let formatter = + Formatter::new(&stop_words, (String::from(""), String::from(""))); + + let mut fields = FieldsIdsMap::new(); + let title = fields.insert("title").unwrap(); + let author = fields.insert("author").unwrap(); + + let mut buf = Vec::new(); + let mut obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(title, Value::String("Harry Potter and the Half-Blood Prince".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(author, Value::String("J. K. Rowling".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + + let obkv = obkv::KvReader::new(&buf); + + let mut formatted_options = BTreeMap::new(); + formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(10) }); + formatted_options.insert(author, FormatOptions { highlight: false, crop: None }); + + let mut matching_words = BTreeMap::new(); + matching_words.insert("potter", Some(6)); + + let value = format_fields( + &fields, + obkv, + &formatter, + &matching_words, + &formatted_options, + ) + .unwrap(); + + assert_eq!(value["title"], "Harry Potter and the Half"); + assert_eq!(value["author"], "J. K. Rowling"); + } + + #[test] + fn formatted_with_crop_0() { + let stop_words = fst::Set::default(); + let formatter = + Formatter::new(&stop_words, (String::from(""), String::from(""))); + + let mut fields = FieldsIdsMap::new(); + let title = fields.insert("title").unwrap(); + let author = fields.insert("author").unwrap(); + + let mut buf = Vec::new(); + let mut obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(title, Value::String("Harry Potter and the Half-Blood Prince".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(author, Value::String("J. K. Rowling".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + + let obkv = obkv::KvReader::new(&buf); + + let mut formatted_options = BTreeMap::new(); + formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(0) }); + formatted_options.insert(author, FormatOptions { highlight: false, crop: None }); + + let mut matching_words = BTreeMap::new(); + matching_words.insert("potter", Some(6)); + + let value = format_fields( + &fields, + obkv, + &formatter, + &matching_words, + &formatted_options, + ) + .unwrap(); + + assert_eq!(value["title"], "Potter"); + assert_eq!(value["author"], "J. K. Rowling"); + } + + #[test] + fn formatted_with_crop_and_no_match() { + let stop_words = fst::Set::default(); + let formatter = + Formatter::new(&stop_words, (String::from(""), String::from(""))); + + let mut fields = FieldsIdsMap::new(); + let title = fields.insert("title").unwrap(); + let author = fields.insert("author").unwrap(); + + let mut buf = Vec::new(); + let mut obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(title, Value::String("Harry Potter and the Half-Blood Prince".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(author, Value::String("J. K. Rowling".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + + let obkv = obkv::KvReader::new(&buf); + + let mut formatted_options = BTreeMap::new(); + formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(6) }); + formatted_options.insert(author, FormatOptions { highlight: false, crop: Some(20) }); + + let mut matching_words = BTreeMap::new(); + matching_words.insert("rowling", Some(3)); + + let value = format_fields( + &fields, + obkv, + &formatter, + &matching_words, + &formatted_options, + ) + .unwrap(); + + assert_eq!(value["title"], "Harry "); + assert_eq!(value["author"], "J. K. Rowling"); + } + + #[test] + fn formatted_with_crop_and_highlight() { + let stop_words = fst::Set::default(); + let formatter = + Formatter::new(&stop_words, (String::from(""), String::from(""))); + + let mut fields = FieldsIdsMap::new(); + let title = fields.insert("title").unwrap(); + let author = fields.insert("author").unwrap(); + + let mut buf = Vec::new(); + let mut obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(title, Value::String("Harry Potter and the Half-Blood Prince".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(author, Value::String("J. K. Rowling".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + + let obkv = obkv::KvReader::new(&buf); + + let mut formatted_options = BTreeMap::new(); + formatted_options.insert(title, FormatOptions { highlight: true, crop: Some(1) }); + formatted_options.insert(author, FormatOptions { highlight: false, crop: None }); + + let mut matching_words = BTreeMap::new(); + matching_words.insert("and", Some(3)); + + let value = format_fields( + &fields, + obkv, + &formatter, + &matching_words, + &formatted_options, + ) + .unwrap(); + + assert_eq!(value["title"], " and "); + assert_eq!(value["author"], "J. K. Rowling"); + } + + #[test] + fn formatted_with_crop_and_highlight_in_word() { + let stop_words = fst::Set::default(); + let formatter = + Formatter::new(&stop_words, (String::from(""), String::from(""))); + + let mut fields = FieldsIdsMap::new(); + let title = fields.insert("title").unwrap(); + let author = fields.insert("author").unwrap(); + + let mut buf = Vec::new(); + let mut obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(title, Value::String("Harry Potter and the Half-Blood Prince".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(author, Value::String("J. K. Rowling".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + + let obkv = obkv::KvReader::new(&buf); + + let mut formatted_options = BTreeMap::new(); + formatted_options.insert(title, FormatOptions { highlight: true, crop: Some(9) }); + formatted_options.insert(author, FormatOptions { highlight: false, crop: None }); + + let mut matching_words = BTreeMap::new(); + matching_words.insert("blood", Some(3)); + + let value = format_fields( + &fields, + obkv, + &formatter, + &matching_words, + &formatted_options, + ) + .unwrap(); + + assert_eq!(value["title"], "the Half-Blood Prince"); + assert_eq!(value["author"], "J. K. Rowling"); } } diff --git a/meilisearch-http/src/index_controller/index_actor/message.rs b/meilisearch-http/src/index_controller/index_actor/message.rs index 377b2c333..e7304d56c 100644 --- a/meilisearch-http/src/index_controller/index_actor/message.rs +++ b/meilisearch-http/src/index_controller/index_actor/message.rs @@ -8,6 +8,7 @@ use crate::index_controller::{Failed, IndexStats, Processed, Processing}; use super::{IndexMeta, IndexResult, IndexSettings}; +#[allow(clippy::large_enum_variant)] pub enum IndexMsg { CreateIndex { uuid: Uuid, diff --git a/meilisearch-http/src/routes/search.rs b/meilisearch-http/src/routes/search.rs index be06960cf..36f5bdf4d 100644 --- a/meilisearch-http/src/routes/search.rs +++ b/meilisearch-http/src/routes/search.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; +use std::collections::{BTreeSet, HashSet}; use std::convert::{TryFrom, TryInto}; use actix_web::{get, post, web, HttpResponse}; @@ -23,7 +23,7 @@ pub struct SearchQueryGet { limit: Option, attributes_to_retrieve: Option, attributes_to_crop: Option, - crop_length: Option, + crop_length: usize, attributes_to_highlight: Option, filter: Option, matches: Option, @@ -36,11 +36,11 @@ impl TryFrom for SearchQuery { fn try_from(other: SearchQueryGet) -> anyhow::Result { let attributes_to_retrieve = other .attributes_to_retrieve - .map(|attrs| attrs.split(',').map(String::from).collect::>()); + .map(|attrs| attrs.split(',').map(String::from).collect::>()); let attributes_to_crop = other .attributes_to_crop - .map(|attrs| attrs.split(',').map(String::from).collect::>()); + .map(|attrs| attrs.split(',').map(String::from).collect::>()); let attributes_to_highlight = other .attributes_to_highlight