5062: Fix bugs for v1.12 r=Kerollmops a=ManyTheFish

# Pull Request

## Related issue
Fixes #4984
Fixes https://github.com/meilisearch/meilisearch/issues/4974
Fixes [SDK test](https://github.com/meilisearch/meilisearch/actions/runs/11886701996/job/33118278794)
## What does this PR do?
- add 3 tests
- fix bugs

Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-11-26 08:10:50 +00:00 committed by GitHub
commit e241f91285
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 180 additions and 7 deletions

View File

@ -4,6 +4,58 @@ use super::*;
use crate::common::Server; use crate::common::Server;
use crate::json; use crate::json;
#[actix_rt::test]
async fn search_formatted_from_sdk() {
let server = Server::new_shared();
let index = server.unique_index();
index
.update_settings(
json!({ "filterableAttributes": ["genre"], "searchableAttributes": ["title"] }),
)
.await;
let documents = json!([
{ "id": 123, "title": "Pride and Prejudice", "genre": "romance" },
{ "id": 456, "title": "Le Petit Prince", "genre": "adventure" },
{ "id": 1, "title": "Alice In Wonderland", "genre": "adventure" },
{ "id": 2, "title": "Le Rouge et le Noir", "genre": "romance" },
{ "id": 1344, "title": "The Hobbit", "genre": "adventure" },
{ "id": 4, "title": "Harry Potter and the Half-Blood Prince", "genre": "fantasy" },
{ "id": 7, "title": "Harry Potter and the Chamber of Secrets", "genre": "fantasy" },
{ "id": 42, "title": "The Hitchhiker's Guide to the Galaxy" }
]);
let (response, _) = index.add_documents(documents, None).await;
index.wait_task(response.uid()).await;
index
.search(
json!({ "q":"prince",
"attributesToCrop": ["title"],
"cropLength": 2,
"filter": "genre = adventure",
"attributesToHighlight": ["title"],
"attributesToRetrieve": ["title"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"title": "Le Petit Prince",
"_formatted": {
"title": "…Petit <em>Prince</em>"
}
}
"###);
}
},
)
.await;
}
#[actix_rt::test] #[actix_rt::test]
async fn formatted_contain_wildcard() { async fn formatted_contain_wildcard() {
let server = Server::new_shared(); let server = Server::new_shared();

View File

@ -15,6 +15,7 @@ mod pagination;
mod restrict_searchable; mod restrict_searchable;
mod search_queue; mod search_queue;
use meili_snap::{json_string, snapshot};
use meilisearch::Opt; use meilisearch::Opt;
use tempfile::TempDir; use tempfile::TempDir;
@ -62,6 +63,71 @@ async fn simple_search() {
.await; .await;
} }
#[actix_rt::test]
async fn search_with_stop_word() {
// related to https://github.com/meilisearch/meilisearch/issues/4984
let server = Server::new().await;
let index = server.index("test");
let (_, code) = index
.update_settings(json!({"stopWords": ["the", "The", "a", "an", "to", "in", "of"]}))
.await;
meili_snap::snapshot!(code, @"202 Accepted");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
// prefix search
index
.search(json!({"q": "to the", "attributesToHighlight": ["title"], "attributesToRetrieve": ["title"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["hits"]), @"[]");
})
.await;
// non-prefix search
index
.search(json!({"q": "to the ", "attributesToHighlight": ["title"], "attributesToRetrieve": ["title"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Shazam!",
"_formatted": {
"title": "Shazam!"
}
},
{
"title": "Captain Marvel",
"_formatted": {
"title": "Captain Marvel"
}
},
{
"title": "Escape Room",
"_formatted": {
"title": "Escape Room"
}
},
{
"title": "How to Train Your Dragon: The Hidden World",
"_formatted": {
"title": "How to Train Your Dragon: The Hidden World"
}
},
{
"title": "Gläss",
"_formatted": {
"title": "Gläss"
}
}
]
"###);
})
.await;
}
#[actix_rt::test] #[actix_rt::test]
async fn phrase_search_with_stop_word() { async fn phrase_search_with_stop_word() {
// related to https://github.com/meilisearch/meilisearch/issues/3521 // related to https://github.com/meilisearch/meilisearch/issues/3521

View File

@ -367,3 +367,50 @@ async fn search_on_exact_field() {
}) })
.await; .await;
} }
#[actix_rt::test]
async fn phrase_search_on_title() {
let server = Server::new().await;
let documents = json!([
{ "id": 8, "desc": "Document Review", "title": "Document Review Specialist II" },
{ "id": 5, "desc": "Document Review", "title": "Document Review Attorney" },
{ "id": 4, "desc": "Document Review", "title": "Document Review Manager - Cyber Incident Response (Remote)" },
{ "id": 3, "desc": "Document Review", "title": "Document Review Paralegal" },
{ "id": 2, "desc": "Document Review", "title": "Document Controller (Saudi National)" },
{ "id": 1, "desc": "Document Review", "title": "Document Reviewer" },
{ "id": 7, "desc": "Document Review", "title": "Document Review Specialist II" },
{ "id": 6, "desc": "Document Review", "title": "Document Review (Entry Level)" }
]);
let index = index_with_documents(&server, &documents).await;
index
.search(
json!({"q": "\"Document Review\"", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["title"]}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Document Review Specialist II"
},
{
"title": "Document Review Attorney"
},
{
"title": "Document Review Manager - Cyber Incident Response (Remote)"
},
{
"title": "Document Review Paralegal"
},
{
"title": "Document Review Specialist II"
},
{
"title": "Document Review (Entry Level)"
}
]
"###);
},
)
.await;
}

View File

@ -274,7 +274,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
last_match_last_token_position_plus_one last_match_last_token_position_plus_one
} else { } else {
// we have matched the end of possible tokens, there's nothing to advance // we have matched the end of possible tokens, there's nothing to advance
tokens.len() - 1 tokens.len()
} }
}; };

View File

@ -193,15 +193,23 @@ pub fn compute_phrase_docids(
if words.is_empty() { if words.is_empty() {
return Ok(RoaringBitmap::new()); return Ok(RoaringBitmap::new());
} }
let mut candidates = RoaringBitmap::new(); let mut candidates = None;
for word in words.iter().flatten().copied() { for word in words.iter().flatten().copied() {
if let Some(word_docids) = ctx.word_docids(None, Word::Original(word))? { if let Some(word_docids) = ctx.word_docids(None, Word::Original(word))? {
candidates |= word_docids; if let Some(candidates) = candidates.as_mut() {
*candidates &= word_docids;
} else {
candidates = Some(word_docids);
}
} else { } else {
return Ok(RoaringBitmap::new()); return Ok(RoaringBitmap::new());
} }
} }
let Some(mut candidates) = candidates else {
return Ok(RoaringBitmap::new());
};
let winsize = words.len().min(3); let winsize = words.len().min(3);
for win in words.windows(winsize) { for win in words.windows(winsize) {

View File

@ -58,9 +58,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
.map(|s| s.iter().map(String::as_str).collect()); .map(|s| s.iter().map(String::as_str).collect());
let old_dictionary: Option<Vec<_>> = let old_dictionary: Option<Vec<_>> =
settings_diff.old.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect()); settings_diff.old.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
let del_builder = let mut del_builder =
tokenizer_builder(old_stop_words, old_separators.as_deref(), old_dictionary.as_deref()); tokenizer_builder(old_stop_words, old_separators.as_deref(), old_dictionary.as_deref());
let del_tokenizer = del_builder.into_tokenizer(); let del_tokenizer = del_builder.build();
let new_stop_words = settings_diff.new.stop_words.as_ref(); let new_stop_words = settings_diff.new.stop_words.as_ref();
let new_separators: Option<Vec<_>> = settings_diff let new_separators: Option<Vec<_>> = settings_diff
@ -70,9 +70,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
.map(|s| s.iter().map(String::as_str).collect()); .map(|s| s.iter().map(String::as_str).collect());
let new_dictionary: Option<Vec<_>> = let new_dictionary: Option<Vec<_>> =
settings_diff.new.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect()); settings_diff.new.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
let add_builder = let mut add_builder =
tokenizer_builder(new_stop_words, new_separators.as_deref(), new_dictionary.as_deref()); tokenizer_builder(new_stop_words, new_separators.as_deref(), new_dictionary.as_deref());
let add_tokenizer = add_builder.into_tokenizer(); let add_tokenizer = add_builder.build();
// iterate over documents. // iterate over documents.
let mut cursor = obkv_documents.into_cursor()?; let mut cursor = obkv_documents.into_cursor()?;