diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 6d23f144a..a15fc01f8 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -49,6 +49,8 @@ chinese-pinyin = ["milli/chinese-pinyin"] hebrew = ["milli/hebrew"] # japanese specialized tokenization japanese = ["milli/japanese"] +# korean specialized tokenization +korean = ["milli/korean"] # thai specialized tokenization thai = ["milli/thai"] # allow greek specialized tokenization diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 75962c450..eacd3dd8e 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -151,6 +151,7 @@ chinese = ["meilisearch-types/chinese"] chinese-pinyin = ["meilisearch-types/chinese-pinyin"] hebrew = ["meilisearch-types/hebrew"] japanese = ["meilisearch-types/japanese"] +korean = ["meilisearch-types/korean"] thai = ["meilisearch-types/thai"] greek = ["meilisearch-types/greek"] khmer = ["meilisearch-types/khmer"] diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 19d5ff358..443e4b9c1 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -336,4 +336,28 @@ mod test { assert_eq!(documents_ids, vec![1]); } + + #[cfg(feature = "korean")] + #[test] + fn test_hangul_language_detection() { + use crate::index::tests::TempIndex; + + let index = TempIndex::new(); + + index + .add_documents(documents!([ + { "id": 0, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" }, + { "id": 1, "title": "김밥먹을래。" }, + { "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" } + ])) + .unwrap(); + + let txn = index.write_txn().unwrap(); + let mut search = Search::new(&txn, &index); + + search.query("김밥"); + let SearchResult { documents_ids, .. } = search.execute().unwrap(); + + assert_eq!(documents_ids, vec![1]); + } }