simple crop before

This commit is contained in:
Marin Postma 2021-05-11 17:27:31 +02:00 committed by Clémentine Urquizar
parent 93002e734c
commit 56c9633c53
No known key found for this signature in database
GPG Key ID: D8E7CC7422E77E1A

View File

@ -7,7 +7,7 @@ use either::Either;
use heed::RoTxn; use heed::RoTxn;
use indexmap::IndexMap; use indexmap::IndexMap;
use itertools::Itertools; use itertools::Itertools;
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token};
use milli::{FilterCondition, FieldId, FieldsIdsMap, MatchingWords}; use milli::{FilterCondition, FieldId, FieldsIdsMap, MatchingWords};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::Value; use serde_json::Value;
@ -303,7 +303,7 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
&self, &self,
value: Value, value: Value,
matcher: &impl Matcher, matcher: &impl Matcher,
need_to_crop: Option<u32>, need_to_crop: Option<usize>,
need_to_highlight: bool, need_to_highlight: bool,
) -> Value { ) -> Value {
match value { match value {
@ -326,30 +326,34 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
value => value, value => value,
} }
} }
fn format_string(&self, s: String, matcher: &impl Matcher, need_to_crop: Option<u32>, need_to_highlight: bool) -> String { fn format_string(&self, s: String, matcher: &impl Matcher, need_to_crop: Option<usize>, need_to_highlight: bool) -> String {
let analyzed = self.analyzer.analyze(&s); let analyzed = self.analyzer.analyze(&s);
let word_iter: Box<dyn Iterator<Item = (String, bool)>> = if let Some(_crop_len) = need_to_crop {
// cropping iterator let tokens: Box<dyn Iterator<Item=(&str, Token)>> = match need_to_crop {
todo!() Some(crop_len) => {
} else { let mut taken = 0;
Box::new(analyzed.reconstruct().map(|(word, token)| { let iter = analyzed
if token.is_word() && matcher.matches(token.text()){ .reconstruct()
(word.to_string(), true) .skip_while(|(_, token)| !matcher.matches(token.text()))
} else { .take_while(move |(word, _)| {
(word.to_string(), false) let take = taken < crop_len;
} taken += word.chars().count();
})) take
});
Box::new(iter)
},
None => Box::new(analyzed.reconstruct()),
}; };
word_iter.map(|(word, is_match)| { tokens.map(|(word, token)| {
if need_to_highlight && is_match { if need_to_highlight && token.is_word() && matcher.matches(token.text()){
let mut new_word = String::new(); let mut new_word = String::new();
new_word.push_str(&self.marks.0); new_word.push_str(&self.marks.0);
new_word.push_str(&word); new_word.push_str(&word);
new_word.push_str(&self.marks.1); new_word.push_str(&self.marks.1);
new_word new_word
} else { } else {
word word.to_string()
} }
}) })
.collect::<String>() .collect::<String>()