mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 10:07:40 +08:00
Compare commits
8 Commits
673b949b7a
...
af8279a71e
Author | SHA1 | Date | |
---|---|---|---|
|
af8279a71e | ||
|
057fcb3993 | ||
|
510ca99996 | ||
|
8924d486db | ||
|
e0c3f3d560 | ||
|
cd796b0f4b | ||
|
72ba353498 | ||
|
0dd321afc7 |
@ -1733,7 +1733,10 @@ fn format_fields(
|
||||
// select the attributes to retrieve
|
||||
let displayable_names =
|
||||
displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
|
||||
permissive_json_pointer::map_leaf_values(&mut document, displayable_names, |key, value| {
|
||||
permissive_json_pointer::map_leaf_values(
|
||||
&mut document,
|
||||
displayable_names,
|
||||
|key, array_indices, value| {
|
||||
// To get the formatting option of each key we need to see all the rules that applies
|
||||
// to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
|
||||
// and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
|
||||
@ -1764,6 +1767,7 @@ fn format_fields(
|
||||
format,
|
||||
&mut infos,
|
||||
compute_matches,
|
||||
array_indices,
|
||||
locales,
|
||||
);
|
||||
|
||||
@ -1772,7 +1776,8 @@ fn format_fields(
|
||||
matches.insert(key.to_owned(), infos);
|
||||
}
|
||||
}
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
let selectors = formatted_options
|
||||
.keys()
|
||||
@ -1790,13 +1795,14 @@ fn format_value(
|
||||
format_options: Option<FormatOptions>,
|
||||
infos: &mut Vec<MatchBounds>,
|
||||
compute_matches: bool,
|
||||
array_indices: &[usize],
|
||||
locales: Option<&[Language]>,
|
||||
) -> Value {
|
||||
match value {
|
||||
Value::String(old_string) => {
|
||||
let mut matcher = builder.build(&old_string, locales);
|
||||
if compute_matches {
|
||||
let matches = matcher.matches();
|
||||
let matches = matcher.matches(array_indices);
|
||||
infos.extend_from_slice(&matches[..]);
|
||||
}
|
||||
|
||||
@ -1808,51 +1814,15 @@ fn format_value(
|
||||
None => Value::String(old_string),
|
||||
}
|
||||
}
|
||||
Value::Array(values) => Value::Array(
|
||||
values
|
||||
.into_iter()
|
||||
.map(|v| {
|
||||
format_value(
|
||||
v,
|
||||
builder,
|
||||
format_options.map(|format_options| FormatOptions {
|
||||
highlight: format_options.highlight,
|
||||
crop: None,
|
||||
}),
|
||||
infos,
|
||||
compute_matches,
|
||||
locales,
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
),
|
||||
Value::Object(object) => Value::Object(
|
||||
object
|
||||
.into_iter()
|
||||
.map(|(k, v)| {
|
||||
(
|
||||
k,
|
||||
format_value(
|
||||
v,
|
||||
builder,
|
||||
format_options.map(|format_options| FormatOptions {
|
||||
highlight: format_options.highlight,
|
||||
crop: None,
|
||||
}),
|
||||
infos,
|
||||
compute_matches,
|
||||
locales,
|
||||
),
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
),
|
||||
// `map_leaf_values` makes sure this is only called for leaf fields
|
||||
Value::Array(_) => unreachable!(),
|
||||
Value::Object(_) => unreachable!(),
|
||||
Value::Number(number) => {
|
||||
let s = number.to_string();
|
||||
|
||||
let mut matcher = builder.build(&s, locales);
|
||||
if compute_matches {
|
||||
let matches = matcher.matches();
|
||||
let matches = matcher.matches(array_indices);
|
||||
infos.extend_from_slice(&matches[..]);
|
||||
}
|
||||
|
||||
|
@ -4,6 +4,58 @@ use super::*;
|
||||
use crate::common::Server;
|
||||
use crate::json;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_formatted_from_sdk() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
index
|
||||
.update_settings(
|
||||
json!({ "filterableAttributes": ["genre"], "searchableAttributes": ["title"] }),
|
||||
)
|
||||
.await;
|
||||
|
||||
let documents = json!([
|
||||
{ "id": 123, "title": "Pride and Prejudice", "genre": "romance" },
|
||||
{ "id": 456, "title": "Le Petit Prince", "genre": "adventure" },
|
||||
{ "id": 1, "title": "Alice In Wonderland", "genre": "adventure" },
|
||||
{ "id": 2, "title": "Le Rouge et le Noir", "genre": "romance" },
|
||||
{ "id": 1344, "title": "The Hobbit", "genre": "adventure" },
|
||||
{ "id": 4, "title": "Harry Potter and the Half-Blood Prince", "genre": "fantasy" },
|
||||
{ "id": 7, "title": "Harry Potter and the Chamber of Secrets", "genre": "fantasy" },
|
||||
{ "id": 42, "title": "The Hitchhiker's Guide to the Galaxy" }
|
||||
]);
|
||||
let (response, _) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({ "q":"prince",
|
||||
"attributesToCrop": ["title"],
|
||||
"cropLength": 2,
|
||||
"filter": "genre = adventure",
|
||||
"attributesToHighlight": ["title"],
|
||||
"attributesToRetrieve": ["title"]
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"title": "Le Petit Prince",
|
||||
"_formatted": {
|
||||
"title": "…Petit <em>Prince</em>"
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn formatted_contain_wildcard() {
|
||||
let server = Server::new_shared();
|
||||
@ -208,7 +260,10 @@ async fn format_nested() {
|
||||
"doggos.name": [
|
||||
{
|
||||
"start": 0,
|
||||
"length": 5
|
||||
"length": 5,
|
||||
"indices": [
|
||||
0
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -15,6 +15,7 @@ mod pagination;
|
||||
mod restrict_searchable;
|
||||
mod search_queue;
|
||||
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch::Opt;
|
||||
use tempfile::TempDir;
|
||||
|
||||
@ -62,6 +63,71 @@ async fn simple_search() {
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_stop_word() {
|
||||
// related to https://github.com/meilisearch/meilisearch/issues/4984
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (_, code) = index
|
||||
.update_settings(json!({"stopWords": ["the", "The", "a", "an", "to", "in", "of"]}))
|
||||
.await;
|
||||
meili_snap::snapshot!(code, @"202 Accepted");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
// prefix search
|
||||
index
|
||||
.search(json!({"q": "to the", "attributesToHighlight": ["title"], "attributesToRetrieve": ["title"] }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response["hits"]), @"[]");
|
||||
})
|
||||
.await;
|
||||
|
||||
// non-prefix search
|
||||
index
|
||||
.search(json!({"q": "to the ", "attributesToHighlight": ["title"], "attributesToRetrieve": ["title"] }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"_formatted": {
|
||||
"title": "Shazam!"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"_formatted": {
|
||||
"title": "Captain Marvel"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"_formatted": {
|
||||
"title": "Escape Room"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"_formatted": {
|
||||
"title": "How to Train Your Dragon: The Hidden World"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Gläss",
|
||||
"_formatted": {
|
||||
"title": "Gläss"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn phrase_search_with_stop_word() {
|
||||
// related to https://github.com/meilisearch/meilisearch/issues/3521
|
||||
|
@ -367,3 +367,50 @@ async fn search_on_exact_field() {
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn phrase_search_on_title() {
|
||||
let server = Server::new().await;
|
||||
let documents = json!([
|
||||
{ "id": 8, "desc": "Document Review", "title": "Document Review Specialist II" },
|
||||
{ "id": 5, "desc": "Document Review", "title": "Document Review Attorney" },
|
||||
{ "id": 4, "desc": "Document Review", "title": "Document Review Manager - Cyber Incident Response (Remote)" },
|
||||
{ "id": 3, "desc": "Document Review", "title": "Document Review Paralegal" },
|
||||
{ "id": 2, "desc": "Document Review", "title": "Document Controller (Saudi National)" },
|
||||
{ "id": 1, "desc": "Document Review", "title": "Document Reviewer" },
|
||||
{ "id": 7, "desc": "Document Review", "title": "Document Review Specialist II" },
|
||||
{ "id": 6, "desc": "Document Review", "title": "Document Review (Entry Level)" }
|
||||
]);
|
||||
let index = index_with_documents(&server, &documents).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({"q": "\"Document Review\"", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["title"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Document Review Specialist II"
|
||||
},
|
||||
{
|
||||
"title": "Document Review Attorney"
|
||||
},
|
||||
{
|
||||
"title": "Document Review Manager - Cyber Incident Response (Remote)"
|
||||
},
|
||||
{
|
||||
"title": "Document Review Paralegal"
|
||||
},
|
||||
{
|
||||
"title": "Document Review Specialist II"
|
||||
},
|
||||
{
|
||||
"title": "Document Review (Entry Level)"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
@ -105,6 +105,8 @@ impl FormatOptions {
|
||||
pub struct MatchBounds {
|
||||
pub start: usize,
|
||||
pub length: usize,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub indices: Option<Vec<usize>>,
|
||||
}
|
||||
|
||||
/// Structure used to analyze a string, compute words that match,
|
||||
@ -220,15 +222,20 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
||||
}
|
||||
|
||||
/// Returns boundaries of the words that match the query.
|
||||
pub fn matches(&mut self) -> Vec<MatchBounds> {
|
||||
pub fn matches(&mut self, array_indices: &[usize]) -> Vec<MatchBounds> {
|
||||
match &self.matches {
|
||||
None => self.compute_matches().matches(),
|
||||
None => self.compute_matches().matches(array_indices),
|
||||
Some((tokens, matches)) => matches
|
||||
.iter()
|
||||
.map(|m| MatchBounds {
|
||||
start: tokens[m.get_first_token_pos()].byte_start,
|
||||
// TODO: Why is this in chars, while start is in bytes?
|
||||
length: m.char_count,
|
||||
indices: if array_indices.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(array_indices.to_owned())
|
||||
},
|
||||
})
|
||||
.collect(),
|
||||
}
|
||||
@ -268,7 +275,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
||||
last_match_last_token_position_plus_one
|
||||
} else {
|
||||
// we have matched the end of possible tokens, there's nothing to advance
|
||||
tokens.len() - 1
|
||||
tokens.len()
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -193,15 +193,23 @@ pub fn compute_phrase_docids(
|
||||
if words.is_empty() {
|
||||
return Ok(RoaringBitmap::new());
|
||||
}
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
let mut candidates = None;
|
||||
for word in words.iter().flatten().copied() {
|
||||
if let Some(word_docids) = ctx.word_docids(None, Word::Original(word))? {
|
||||
candidates |= word_docids;
|
||||
if let Some(candidates) = candidates.as_mut() {
|
||||
*candidates &= word_docids;
|
||||
} else {
|
||||
candidates = Some(word_docids);
|
||||
}
|
||||
} else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
}
|
||||
}
|
||||
|
||||
let Some(mut candidates) = candidates else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
};
|
||||
|
||||
let winsize = words.len().min(3);
|
||||
|
||||
for win in words.windows(winsize) {
|
||||
|
@ -57,9 +57,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
.map(|s| s.iter().map(String::as_str).collect());
|
||||
let old_dictionary: Option<Vec<_>> =
|
||||
settings_diff.old.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let del_builder =
|
||||
let mut del_builder =
|
||||
tokenizer_builder(old_stop_words, old_separators.as_deref(), old_dictionary.as_deref());
|
||||
let del_tokenizer = del_builder.into_tokenizer();
|
||||
let del_tokenizer = del_builder.build();
|
||||
|
||||
let new_stop_words = settings_diff.new.stop_words.as_ref();
|
||||
let new_separators: Option<Vec<_>> = settings_diff
|
||||
@ -69,9 +69,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
.map(|s| s.iter().map(String::as_str).collect());
|
||||
let new_dictionary: Option<Vec<_>> =
|
||||
settings_diff.new.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let add_builder =
|
||||
let mut add_builder =
|
||||
tokenizer_builder(new_stop_words, new_separators.as_deref(), new_dictionary.as_deref());
|
||||
let add_tokenizer = add_builder.into_tokenizer();
|
||||
let add_tokenizer = add_builder.build();
|
||||
|
||||
// iterate over documents.
|
||||
let mut cursor = obkv_documents.into_cursor()?;
|
||||
|
@ -45,7 +45,7 @@ fn contained_in(selector: &str, key: &str) -> bool {
|
||||
/// map_leaf_values(
|
||||
/// value.as_object_mut().unwrap(),
|
||||
/// ["jean.race.name"],
|
||||
/// |key, value| match (value, key) {
|
||||
/// |key, _array_indices, value| match (value, key) {
|
||||
/// (Value::String(name), "jean.race.name") => *name = "patou".to_string(),
|
||||
/// _ => unreachable!(),
|
||||
/// },
|
||||
@ -66,17 +66,18 @@ fn contained_in(selector: &str, key: &str) -> bool {
|
||||
pub fn map_leaf_values<'a>(
|
||||
value: &mut Map<String, Value>,
|
||||
selectors: impl IntoIterator<Item = &'a str>,
|
||||
mut mapper: impl FnMut(&str, &mut Value),
|
||||
mut mapper: impl FnMut(&str, &[usize], &mut Value),
|
||||
) {
|
||||
let selectors: Vec<_> = selectors.into_iter().collect();
|
||||
map_leaf_values_in_object(value, &selectors, "", &mut mapper);
|
||||
map_leaf_values_in_object(value, &selectors, "", &[], &mut mapper);
|
||||
}
|
||||
|
||||
pub fn map_leaf_values_in_object(
|
||||
value: &mut Map<String, Value>,
|
||||
selectors: &[&str],
|
||||
base_key: &str,
|
||||
mapper: &mut impl FnMut(&str, &mut Value),
|
||||
array_indices: &[usize],
|
||||
mapper: &mut impl FnMut(&str, &[usize], &mut Value),
|
||||
) {
|
||||
for (key, value) in value.iter_mut() {
|
||||
let base_key = if base_key.is_empty() {
|
||||
@ -94,12 +95,12 @@ pub fn map_leaf_values_in_object(
|
||||
if should_continue {
|
||||
match value {
|
||||
Value::Object(object) => {
|
||||
map_leaf_values_in_object(object, selectors, &base_key, mapper)
|
||||
map_leaf_values_in_object(object, selectors, &base_key, array_indices, mapper)
|
||||
}
|
||||
Value::Array(array) => {
|
||||
map_leaf_values_in_array(array, selectors, &base_key, mapper)
|
||||
map_leaf_values_in_array(array, selectors, &base_key, array_indices, mapper)
|
||||
}
|
||||
value => mapper(&base_key, value),
|
||||
value => mapper(&base_key, array_indices, value),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -109,13 +110,24 @@ pub fn map_leaf_values_in_array(
|
||||
values: &mut [Value],
|
||||
selectors: &[&str],
|
||||
base_key: &str,
|
||||
mapper: &mut impl FnMut(&str, &mut Value),
|
||||
base_array_indices: &[usize],
|
||||
mapper: &mut impl FnMut(&str, &[usize], &mut Value),
|
||||
) {
|
||||
for value in values.iter_mut() {
|
||||
// This avoids allocating twice
|
||||
let mut array_indices = Vec::with_capacity(base_array_indices.len() + 1);
|
||||
array_indices.extend_from_slice(base_array_indices);
|
||||
array_indices.push(0);
|
||||
|
||||
for (i, value) in values.iter_mut().enumerate() {
|
||||
*array_indices.last_mut().unwrap() = i;
|
||||
match value {
|
||||
Value::Object(object) => map_leaf_values_in_object(object, selectors, base_key, mapper),
|
||||
Value::Array(array) => map_leaf_values_in_array(array, selectors, base_key, mapper),
|
||||
value => mapper(base_key, value),
|
||||
Value::Object(object) => {
|
||||
map_leaf_values_in_object(object, selectors, base_key, &array_indices, mapper)
|
||||
}
|
||||
Value::Array(array) => {
|
||||
map_leaf_values_in_array(array, selectors, base_key, &array_indices, mapper)
|
||||
}
|
||||
value => mapper(base_key, &array_indices, value),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -743,12 +755,14 @@ mod tests {
|
||||
}
|
||||
});
|
||||
|
||||
map_leaf_values(value.as_object_mut().unwrap(), ["jean.race.name"], |key, value| {
|
||||
match (value, key) {
|
||||
map_leaf_values(
|
||||
value.as_object_mut().unwrap(),
|
||||
["jean.race.name"],
|
||||
|key, _, value| match (value, key) {
|
||||
(Value::String(name), "jean.race.name") => *name = S("patou"),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
value,
|
||||
@ -775,7 +789,7 @@ mod tests {
|
||||
});
|
||||
|
||||
let mut calls = 0;
|
||||
map_leaf_values(value.as_object_mut().unwrap(), ["jean"], |key, value| {
|
||||
map_leaf_values(value.as_object_mut().unwrap(), ["jean"], |key, _, value| {
|
||||
calls += 1;
|
||||
match (value, key) {
|
||||
(Value::String(name), "jean.race.name") => *name = S("patou"),
|
||||
@ -798,4 +812,52 @@ mod tests {
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn map_array() {
|
||||
let mut value: Value = json!({
|
||||
"no_array": "peter",
|
||||
"simple": ["foo", "bar"],
|
||||
"nested": [
|
||||
{
|
||||
"a": [
|
||||
["cat", "dog"],
|
||||
["fox", "bear"],
|
||||
],
|
||||
"b": "hi",
|
||||
},
|
||||
{
|
||||
"a": ["green", "blue"],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
map_leaf_values(
|
||||
value.as_object_mut().unwrap(),
|
||||
["no_array", "simple", "nested"],
|
||||
|_key, array_indices, value| {
|
||||
*value = format!("{array_indices:?}").into();
|
||||
},
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
value,
|
||||
json!({
|
||||
"no_array": "[]",
|
||||
"simple": ["[0]", "[1]"],
|
||||
"nested": [
|
||||
{
|
||||
"a": [
|
||||
["[0, 0, 0]", "[0, 0, 1]"],
|
||||
["[0, 1, 0]", "[0, 1, 1]"],
|
||||
],
|
||||
"b": "[0]",
|
||||
},
|
||||
{
|
||||
"a": ["[1, 0]", "[1, 1]"],
|
||||
},
|
||||
],
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user