From 500eeca3fb0182c1c1ca3b5a784f37d2f83c8012 Mon Sep 17 00:00:00 2001 From: Quentin de Quelen Date: Thu, 2 Apr 2020 19:53:51 +0200 Subject: [PATCH] Rework query highlight/crop parameters --- meilisearch-http/src/helpers/meilisearch.rs | 80 +++++++++++++------- meilisearch-http/src/routes/search.rs | 82 +++++++++++++-------- 2 files changed, 104 insertions(+), 58 deletions(-) diff --git a/meilisearch-http/src/helpers/meilisearch.rs b/meilisearch-http/src/helpers/meilisearch.rs index 6cb1f97a3..9dba1696d 100644 --- a/meilisearch-http/src/helpers/meilisearch.rs +++ b/meilisearch-http/src/helpers/meilisearch.rs @@ -12,8 +12,8 @@ use meilisearch_core::Filter; use meilisearch_core::criterion::*; use meilisearch_core::settings::RankingRule; use meilisearch_core::{Highlight, Index, MainT, RankedMap}; -use meilisearch_tokenizer::is_cjk; use meilisearch_schema::{FieldId, Schema}; +use meilisearch_tokenizer::is_cjk; use serde::{Deserialize, Serialize}; use serde_json::Value; use siphasher::sip::SipHasher; @@ -220,36 +220,51 @@ impl<'a> SearchBuilder<'a> { } let start = Instant::now(); - let result = query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit)); + let result = + query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit)); let (docs, nb_hits) = result.map_err(|e| Error::SearchDocuments(e.to_string()))?; let time_ms = start.elapsed().as_millis() as usize; + let mut all_attributes: HashSet<&str> = HashSet::new(); + let mut all_formatted: HashSet<&str> = HashSet::new(); + + match &self.attributes_to_retrieve { + Some(to_retrieve) => { + all_attributes.extend(to_retrieve.iter().map(String::as_str)); + + if let Some(to_highlight) = &self.attributes_to_highlight { + all_formatted.extend(to_highlight.iter().map(String::as_str)); + } + + if let Some(to_crop) = &self.attributes_to_crop { + all_formatted.extend(to_crop.keys().map(String::as_str)); + } + + all_attributes.extend(&all_formatted); + }, + None => { + all_attributes.extend(schema.displayed_name()); + // If we specified at least one attribute to highlight or crop then + // all available attributes will be returned in the _formatted field. + if self.attributes_to_highlight.is_some() || self.attributes_to_crop.is_some() { + all_formatted.extend(all_attributes.iter().cloned()); + } + }, + } + let mut hits = Vec::with_capacity(self.limit); for doc in docs { - // retrieve the content of document in kv store - let mut fields: Option> = None; - if let Some(attributes_to_retrieve) = &self.attributes_to_retrieve { - let mut set = HashSet::new(); - for field in attributes_to_retrieve { - set.insert(field.as_str()); - } - fields = Some(set); - } - - let document: IndexMap = self + let mut document: IndexMap = self .index - .document(reader, fields.as_ref(), doc.id) + .document(reader, Some(&all_attributes), doc.id) .map_err(|e| Error::RetrieveDocument(doc.id.0, e.to_string()))? .ok_or(Error::DocumentNotFound(doc.id.0))?; - let has_attributes_to_highlight = self.attributes_to_highlight.is_some(); - let has_attributes_to_crop = self.attributes_to_crop.is_some(); + let mut formatted = document.iter() + .filter(|(key, _)| all_formatted.contains(key.as_str())) + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); - let mut formatted = if has_attributes_to_highlight || has_attributes_to_crop { - document.clone() - } else { - IndexMap::new() - }; let mut matches = doc.highlights.clone(); // Crops fields if needed @@ -258,13 +273,24 @@ impl<'a> SearchBuilder<'a> { } // Transform to readable matches - let matches = calculate_matches(matches, self.attributes_to_retrieve.clone(), &schema); - if let Some(attributes_to_highlight) = &self.attributes_to_highlight { + let matches = calculate_matches( + matches.clone(), + self.attributes_to_highlight.clone(), + &schema, + ); formatted = calculate_highlights(&formatted, &matches, attributes_to_highlight); } - let matches_info = if self.matches { Some(matches) } else { None }; + let matches_info = if self.matches { + Some(calculate_matches(matches, self.attributes_to_retrieve.clone(), &schema)) + } else { + None + }; + + if let Some(attributes_to_retrieve) = &self.attributes_to_retrieve { + document.retain(|key, _| attributes_to_retrieve.contains(&key.to_string())) + } let hit = SearchHit { document, @@ -369,7 +395,7 @@ pub struct SearchResult { pub query: String, } -/// returns the start index and the length on the crop. +/// returns the start index and the length on the crop. fn aligned_crop(text: &str, match_index: usize, context: usize) -> (usize, usize) { let is_word_component = |c: &char| c.is_alphanumeric() && !is_cjk(*c); @@ -553,8 +579,8 @@ mod tests { let (start, length) = aligned_crop(&text, 5, 3); let cropped = text.chars().skip(start).take(length).collect::().trim().to_string(); assert_eq!("isのス", cropped); - - // split regular word / CJK word, no space + + // split regular word / CJK word, no space let (start, length) = aligned_crop(&text, 7, 1); let cropped = text.chars().skip(start).take(length).collect::().trim().to_string(); assert_eq!("のス", cropped); diff --git a/meilisearch-http/src/routes/search.rs b/meilisearch-http/src/routes/search.rs index 2dffd9356..a3a90391c 100644 --- a/meilisearch-http/src/routes/search.rs +++ b/meilisearch-http/src/routes/search.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use std::collections::HashSet; use std::time::Duration; +use log::warn; use meilisearch_core::Index; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use serde::{Deserialize, Serialize}; @@ -53,45 +54,64 @@ pub async fn search_with_url_query(ctx: Request) -> SResult { search_builder.limit(limit); } - if let Some(attributes_to_retrieve) = query.attributes_to_retrieve { - for attr in attributes_to_retrieve.split(',') { - search_builder.add_retrievable_field(attr.to_string()); + let available_attributes = schema.displayed_name(); + let mut restricted_attributes: HashSet<&str>; + match &query.attributes_to_retrieve { + Some(attributes_to_retrieve) => { + restricted_attributes = attributes_to_retrieve.split(',').collect(); + restricted_attributes.retain(|attr| available_attributes.contains(attr)); + }, + None => { + restricted_attributes = available_attributes.clone(); } } if let Some(attributes_to_crop) = query.attributes_to_crop { - let crop_length = query.crop_length.unwrap_or(200); - if attributes_to_crop == "*" { - let attributes_to_crop = schema - .displayed_name() - .iter() - .map(|attr| (attr.to_string(), crop_length)) - .collect(); - search_builder.attributes_to_crop(attributes_to_crop); - } else { - let attributes_to_crop = attributes_to_crop - .split(',') - .map(|r| (r.to_string(), crop_length)) - .collect(); - search_builder.attributes_to_crop(attributes_to_crop); + let default_length = query.crop_length.unwrap_or(200); + let mut final_attributes: HashMap = HashMap::new(); + + for attribute in attributes_to_crop.split(',') { + let mut attribute = attribute.split(':'); + let attr = attribute.next(); + let length = attribute.next().and_then(|s| s.parse().ok()).unwrap_or(default_length); + match attr { + Some("*") => { + for attr in &restricted_attributes { + final_attributes.insert(attr.to_string(), length); + } + }, + Some(attr) => { + if available_attributes.contains(attr) { + final_attributes.insert(attr.to_string(), length); + } else { + warn!("The attributes {:?} present in attributesToCrop parameter doesn't exist", attr); + } + }, + None => (), + } } + + search_builder.attributes_to_crop(final_attributes); } - if let Some(attributes_to_highlight) = query.attributes_to_highlight { - let attributes_to_highlight = if attributes_to_highlight == "*" { - schema - .displayed_name() - .iter() - .map(|s| s.to_string()) - .collect() - } else { - attributes_to_highlight - .split(',') - .map(|s| s.to_string()) - .collect() - }; + if let Some(inline_attributes) = query.attributes_to_highlight { + let mut final_attributes: HashSet = HashSet::new(); - search_builder.attributes_to_highlight(attributes_to_highlight); + for attribute in inline_attributes.split(',') { + if attribute == "*" { + for attr in &restricted_attributes { + final_attributes.insert(attr.to_string()); + } + } else { + if available_attributes.contains(attribute) { + final_attributes.insert(attribute.to_string()); + } else { + warn!("The attributes {:?} present in attributesToHighlight parameter doesn't exist", attribute); + } + } + } + + search_builder.attributes_to_highlight(final_attributes); } if let Some(filters) = query.filters {