From 6da54d0cb6648fdceb32b924064fcd94c1b448e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 9 Mar 2023 14:56:13 +0100 Subject: [PATCH 1/2] Add a test to fix a diacritic issue --- meilisearch/tests/search/mod.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 9a51be676..1e5c23a71 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -192,6 +192,31 @@ async fn test_kanji_language_detection() { .await; } +#[cfg(feature = "default")] +#[actix_rt::test] +async fn test_thai_language() { + let server = Server::new().await; + let index = server.index("test"); + + // We don't need documents, the issue is on the query side only. + let documents = json!([ + { "id": 0, "title": "สบู่สมุนไพรดอกดาวเรือง 100 กรัม จำนวน 6 ก้อน" }, + { "id": 1, "title": "สบู่สมุนไพรชาเขียว 100 กรัม จำนวน 6 ก้อน" }, + { "id": 2, "title": "สบู่สมุนไพรฝางแดงผสมว่านหางจรเข้ 100 กรัม จำนวน 6 ก้อน" } + ]); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + index.update_settings(json!({"rankingRules": ["exactness"]})).await; + index.wait_task(1).await; + + index + .search(json!({"q": "สบู"}), |response, code| { + assert_eq!(code, 200, "{}", response); + }) + .await; +} + #[actix_rt::test] async fn search_multiple_params() { let server = Server::new().await; From 175e8a84959f79d9ac38a941610edaa8fb3ee795 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 9 Mar 2023 14:57:30 +0100 Subject: [PATCH 2/2] Fix a diacritic issue --- milli/src/search/query_tree.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index 541dd8f7a..50f16c2d9 100755 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -792,6 +792,10 @@ where let mut peekable = query.peekable(); while let Some(token) = peekable.next() { + if token.lemma().is_empty() { + continue; + } + // early return if word limit is exceeded if primitive_query.len() >= parts_limit { return primitive_query;