diff --git a/crates/meilisearch/tests/search/formatted.rs b/crates/meilisearch/tests/search/formatted.rs
index ee33939fd..5ded39976 100644
--- a/crates/meilisearch/tests/search/formatted.rs
+++ b/crates/meilisearch/tests/search/formatted.rs
@@ -4,6 +4,58 @@ use super::*;
use crate::common::Server;
use crate::json;
+#[actix_rt::test]
+async fn search_formatted_from_sdk() {
+ let server = Server::new_shared();
+ let index = server.unique_index();
+
+ index
+ .update_settings(
+ json!({ "filterableAttributes": ["genre"], "searchableAttributes": ["title"] }),
+ )
+ .await;
+
+ let documents = json!([
+ { "id": 123, "title": "Pride and Prejudice", "genre": "romance" },
+ { "id": 456, "title": "Le Petit Prince", "genre": "adventure" },
+ { "id": 1, "title": "Alice In Wonderland", "genre": "adventure" },
+ { "id": 2, "title": "Le Rouge et le Noir", "genre": "romance" },
+ { "id": 1344, "title": "The Hobbit", "genre": "adventure" },
+ { "id": 4, "title": "Harry Potter and the Half-Blood Prince", "genre": "fantasy" },
+ { "id": 7, "title": "Harry Potter and the Chamber of Secrets", "genre": "fantasy" },
+ { "id": 42, "title": "The Hitchhiker's Guide to the Galaxy" }
+ ]);
+ let (response, _) = index.add_documents(documents, None).await;
+ index.wait_task(response.uid()).await;
+
+ index
+ .search(
+ json!({ "q":"prince",
+ "attributesToCrop": ["title"],
+ "cropLength": 2,
+ "filter": "genre = adventure",
+ "attributesToHighlight": ["title"],
+ "attributesToRetrieve": ["title"]
+ }),
+ |response, code| {
+ assert_eq!(code, 200, "{}", response);
+ allow_duplicates! {
+ assert_json_snapshot!(response["hits"][0],
+ { "._rankingScore" => "[score]" },
+ @r###"
+ {
+ "title": "Le Petit Prince",
+ "_formatted": {
+ "title": "…Petit Prince"
+ }
+ }
+ "###);
+ }
+ },
+ )
+ .await;
+}
+
#[actix_rt::test]
async fn formatted_contain_wildcard() {
let server = Server::new_shared();
diff --git a/crates/meilisearch/tests/search/mod.rs b/crates/meilisearch/tests/search/mod.rs
index d1091d944..f3c11e451 100644
--- a/crates/meilisearch/tests/search/mod.rs
+++ b/crates/meilisearch/tests/search/mod.rs
@@ -15,6 +15,7 @@ mod pagination;
mod restrict_searchable;
mod search_queue;
+use meili_snap::{json_string, snapshot};
use meilisearch::Opt;
use tempfile::TempDir;
@@ -62,6 +63,71 @@ async fn simple_search() {
.await;
}
+#[actix_rt::test]
+async fn search_with_stop_word() {
+ // related to https://github.com/meilisearch/meilisearch/issues/4984
+ let server = Server::new().await;
+ let index = server.index("test");
+
+ let (_, code) = index
+ .update_settings(json!({"stopWords": ["the", "The", "a", "an", "to", "in", "of"]}))
+ .await;
+ meili_snap::snapshot!(code, @"202 Accepted");
+
+ let documents = DOCUMENTS.clone();
+ index.add_documents(documents, None).await;
+ index.wait_task(1).await;
+
+ // prefix search
+ index
+ .search(json!({"q": "to the", "attributesToHighlight": ["title"], "attributesToRetrieve": ["title"] }), |response, code| {
+ assert_eq!(code, 200, "{}", response);
+ snapshot!(json_string!(response["hits"]), @"[]");
+ })
+ .await;
+
+ // non-prefix search
+ index
+ .search(json!({"q": "to the ", "attributesToHighlight": ["title"], "attributesToRetrieve": ["title"] }), |response, code| {
+ assert_eq!(code, 200, "{}", response);
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "title": "Shazam!",
+ "_formatted": {
+ "title": "Shazam!"
+ }
+ },
+ {
+ "title": "Captain Marvel",
+ "_formatted": {
+ "title": "Captain Marvel"
+ }
+ },
+ {
+ "title": "Escape Room",
+ "_formatted": {
+ "title": "Escape Room"
+ }
+ },
+ {
+ "title": "How to Train Your Dragon: The Hidden World",
+ "_formatted": {
+ "title": "How to Train Your Dragon: The Hidden World"
+ }
+ },
+ {
+ "title": "Gläss",
+ "_formatted": {
+ "title": "Gläss"
+ }
+ }
+ ]
+ "###);
+ })
+ .await;
+}
+
#[actix_rt::test]
async fn phrase_search_with_stop_word() {
// related to https://github.com/meilisearch/meilisearch/issues/3521
diff --git a/crates/meilisearch/tests/search/restrict_searchable.rs b/crates/meilisearch/tests/search/restrict_searchable.rs
index ca659c518..abd13fadf 100644
--- a/crates/meilisearch/tests/search/restrict_searchable.rs
+++ b/crates/meilisearch/tests/search/restrict_searchable.rs
@@ -367,3 +367,50 @@ async fn search_on_exact_field() {
})
.await;
}
+
+#[actix_rt::test]
+async fn phrase_search_on_title() {
+ let server = Server::new().await;
+ let documents = json!([
+ { "id": 8, "desc": "Document Review", "title": "Document Review Specialist II" },
+ { "id": 5, "desc": "Document Review", "title": "Document Review Attorney" },
+ { "id": 4, "desc": "Document Review", "title": "Document Review Manager - Cyber Incident Response (Remote)" },
+ { "id": 3, "desc": "Document Review", "title": "Document Review Paralegal" },
+ { "id": 2, "desc": "Document Review", "title": "Document Controller (Saudi National)" },
+ { "id": 1, "desc": "Document Review", "title": "Document Reviewer" },
+ { "id": 7, "desc": "Document Review", "title": "Document Review Specialist II" },
+ { "id": 6, "desc": "Document Review", "title": "Document Review (Entry Level)" }
+ ]);
+ let index = index_with_documents(&server, &documents).await;
+
+ index
+ .search(
+ json!({"q": "\"Document Review\"", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["title"]}),
+ |response, code| {
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "title": "Document Review Specialist II"
+ },
+ {
+ "title": "Document Review Attorney"
+ },
+ {
+ "title": "Document Review Manager - Cyber Incident Response (Remote)"
+ },
+ {
+ "title": "Document Review Paralegal"
+ },
+ {
+ "title": "Document Review Specialist II"
+ },
+ {
+ "title": "Document Review (Entry Level)"
+ }
+ ]
+ "###);
+ },
+ )
+ .await;
+}
diff --git a/crates/milli/src/search/new/matches/mod.rs b/crates/milli/src/search/new/matches/mod.rs
index 7d8d25502..b84079bcf 100644
--- a/crates/milli/src/search/new/matches/mod.rs
+++ b/crates/milli/src/search/new/matches/mod.rs
@@ -275,7 +275,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
last_match_last_token_position_plus_one
} else {
// we have matched the end of possible tokens, there's nothing to advance
- tokens.len() - 1
+ tokens.len()
}
};
diff --git a/crates/milli/src/search/new/resolve_query_graph.rs b/crates/milli/src/search/new/resolve_query_graph.rs
index 7a47b0a66..4496f8c65 100644
--- a/crates/milli/src/search/new/resolve_query_graph.rs
+++ b/crates/milli/src/search/new/resolve_query_graph.rs
@@ -193,15 +193,23 @@ pub fn compute_phrase_docids(
if words.is_empty() {
return Ok(RoaringBitmap::new());
}
- let mut candidates = RoaringBitmap::new();
+ let mut candidates = None;
for word in words.iter().flatten().copied() {
if let Some(word_docids) = ctx.word_docids(None, Word::Original(word))? {
- candidates |= word_docids;
+ if let Some(candidates) = candidates.as_mut() {
+ *candidates &= word_docids;
+ } else {
+ candidates = Some(word_docids);
+ }
} else {
return Ok(RoaringBitmap::new());
}
}
+ let Some(mut candidates) = candidates else {
+ return Ok(RoaringBitmap::new());
+ };
+
let winsize = words.len().min(3);
for win in words.windows(winsize) {
diff --git a/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
index ba11ceeb3..16ea92fa4 100644
--- a/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@@ -57,9 +57,9 @@ pub fn extract_docid_word_positions(
.map(|s| s.iter().map(String::as_str).collect());
let old_dictionary: Option> =
settings_diff.old.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
- let del_builder =
+ let mut del_builder =
tokenizer_builder(old_stop_words, old_separators.as_deref(), old_dictionary.as_deref());
- let del_tokenizer = del_builder.into_tokenizer();
+ let del_tokenizer = del_builder.build();
let new_stop_words = settings_diff.new.stop_words.as_ref();
let new_separators: Option> = settings_diff
@@ -69,9 +69,9 @@ pub fn extract_docid_word_positions(
.map(|s| s.iter().map(String::as_str).collect());
let new_dictionary: Option> =
settings_diff.new.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
- let add_builder =
+ let mut add_builder =
tokenizer_builder(new_stop_words, new_separators.as_deref(), new_dictionary.as_deref());
- let add_tokenizer = add_builder.into_tokenizer();
+ let add_tokenizer = add_builder.build();
// iterate over documents.
let mut cursor = obkv_documents.into_cursor()?;