mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Refactor the crop
This commit is contained in:
parent
9543ab4db6
commit
33e55bd82e
@ -1,5 +1,5 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::{BTreeMap, BTreeSet, HashSet, VecDeque};
|
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use anyhow::bail;
|
use anyhow::bail;
|
||||||
@ -273,16 +273,13 @@ fn add_crop_to_formatted_options(
|
|||||||
displayed_ids: &BTreeSet<u8>,
|
displayed_ids: &BTreeSet<u8>,
|
||||||
) {
|
) {
|
||||||
for attr in attr_to_crop {
|
for attr in attr_to_crop {
|
||||||
let mut attr_name = attr.clone();
|
let mut split = attr.rsplitn(2, ':');
|
||||||
let mut attr_len = crop_length;
|
let (attr_name, attr_len) = match split.next().zip(split.next()) {
|
||||||
|
|
||||||
let mut split = attr_name.rsplitn(2, ':');
|
|
||||||
attr_name = match split.next().zip(split.next()) {
|
|
||||||
Some((len, name)) => {
|
Some((len, name)) => {
|
||||||
attr_len = len.parse().unwrap_or(crop_length);
|
let crop_len = len.parse::<usize>().unwrap_or(crop_length);
|
||||||
name.to_string()
|
(name, crop_len)
|
||||||
},
|
},
|
||||||
None => attr_name,
|
None => (attr.as_str(), crop_length),
|
||||||
};
|
};
|
||||||
|
|
||||||
if attr_name == "*" {
|
if attr_name == "*" {
|
||||||
@ -452,42 +449,49 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> {
|
|||||||
|
|
||||||
let tokens: Box<dyn Iterator<Item = (&str, Token)>> = match format_options.crop {
|
let tokens: Box<dyn Iterator<Item = (&str, Token)>> = match format_options.crop {
|
||||||
Some(crop_len) => {
|
Some(crop_len) => {
|
||||||
let mut buffer = VecDeque::new();
|
let mut buffer = Vec::new();
|
||||||
let mut tokens = analyzed.reconstruct().peekable();
|
let mut tokens = analyzed.reconstruct().peekable();
|
||||||
let mut taken_before = 0;
|
|
||||||
while let Some((word, token)) = tokens.next_if(|(_, token)| matcher.matches(token.text()).is_none()) {
|
while let Some((word, token)) = tokens.next_if(|(_, token)| matcher.matches(token.text()).is_none()) {
|
||||||
buffer.push_back((word, token));
|
buffer.push((word, token));
|
||||||
taken_before += word.chars().count();
|
}
|
||||||
while taken_before > crop_len {
|
|
||||||
// Around to the previous word
|
match tokens.next() {
|
||||||
if let Some((word, _)) = buffer.front() {
|
Some(token) => {
|
||||||
if taken_before - word.chars().count() < crop_len {
|
let mut total_len: usize = buffer.iter().map(|(word, _)| word.len()).sum();
|
||||||
break;
|
let before_iter = buffer.into_iter().skip_while(move |(word, _)| {
|
||||||
}
|
total_len -= word.len();
|
||||||
}
|
let take = total_len >= crop_len;
|
||||||
if let Some((word, _)) = buffer.pop_front() {
|
take
|
||||||
taken_before -= word.chars().count();
|
});
|
||||||
}
|
|
||||||
|
let mut taken_after = 0;
|
||||||
|
let after_iter = tokens
|
||||||
|
.take_while(move |(word, _)| {
|
||||||
|
let take = taken_after < crop_len;
|
||||||
|
taken_after += word.chars().count();
|
||||||
|
take
|
||||||
|
});
|
||||||
|
|
||||||
|
let iter = before_iter
|
||||||
|
.chain(Some(token))
|
||||||
|
.chain(after_iter);
|
||||||
|
|
||||||
|
Box::new(iter)
|
||||||
|
|
||||||
|
},
|
||||||
|
// If no word matches in the attribute
|
||||||
|
None => {
|
||||||
|
let mut count = 0;
|
||||||
|
let iter = buffer.into_iter().take_while(move |(word, _)| {
|
||||||
|
let take = count < crop_len;
|
||||||
|
count += word.len();
|
||||||
|
take
|
||||||
|
});
|
||||||
|
|
||||||
|
Box::new(iter)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(token) = tokens.next() {
|
|
||||||
buffer.push_back(token);
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut taken_after = 0;
|
|
||||||
let after_iter = tokens
|
|
||||||
.take_while(move |(word, _)| {
|
|
||||||
let take = taken_after < crop_len;
|
|
||||||
taken_after += word.chars().count();
|
|
||||||
take
|
|
||||||
});
|
|
||||||
|
|
||||||
let iter = buffer
|
|
||||||
.into_iter()
|
|
||||||
.chain(after_iter);
|
|
||||||
|
|
||||||
Box::new(iter)
|
|
||||||
}
|
}
|
||||||
None => Box::new(analyzed.reconstruct()),
|
None => Box::new(analyzed.reconstruct()),
|
||||||
};
|
};
|
||||||
@ -757,6 +761,48 @@ mod test {
|
|||||||
assert_eq!(value["author"], "J. K. Rowling");
|
assert_eq!(value["author"], "J. K. Rowling");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn formatted_with_crop_and_no_match() {
|
||||||
|
let stop_words = fst::Set::default();
|
||||||
|
let formatter =
|
||||||
|
Formatter::new(&stop_words, (String::from("<em>"), String::from("</em>")));
|
||||||
|
|
||||||
|
let mut fields = FieldsIdsMap::new();
|
||||||
|
let title = fields.insert("title").unwrap();
|
||||||
|
let author = fields.insert("author").unwrap();
|
||||||
|
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
let mut obkv = obkv::KvWriter::new(&mut buf);
|
||||||
|
obkv.insert(title, Value::String("Harry Potter and the Half-Blood Prince".into()).to_string().as_bytes())
|
||||||
|
.unwrap();
|
||||||
|
obkv.finish().unwrap();
|
||||||
|
obkv = obkv::KvWriter::new(&mut buf);
|
||||||
|
obkv.insert(author, Value::String("J. K. Rowling".into()).to_string().as_bytes())
|
||||||
|
.unwrap();
|
||||||
|
obkv.finish().unwrap();
|
||||||
|
|
||||||
|
let obkv = obkv::KvReader::new(&buf);
|
||||||
|
|
||||||
|
let mut formatted_options = BTreeMap::new();
|
||||||
|
formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(6) });
|
||||||
|
formatted_options.insert(author, FormatOptions { highlight: false, crop: Some(20) });
|
||||||
|
|
||||||
|
let mut matching_words = BTreeMap::new();
|
||||||
|
matching_words.insert("rowling", Some(3));
|
||||||
|
|
||||||
|
let value = format_fields(
|
||||||
|
&fields,
|
||||||
|
obkv,
|
||||||
|
&formatter,
|
||||||
|
&matching_words,
|
||||||
|
&formatted_options,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(value["title"], "Harry ");
|
||||||
|
assert_eq!(value["author"], "J. K. Rowling");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn formatted_with_crop_and_highlight() {
|
fn formatted_with_crop_and_highlight() {
|
||||||
let stop_words = fst::Set::default();
|
let stop_words = fst::Set::default();
|
||||||
|
Loading…
Reference in New Issue
Block a user