4665: Add missing Korean support r=ManyTheFish a=junhochoi

Some configuration is missing `korean` features and add a test case in `milli/src/search/mod.rs`.

# Pull Request

## Related issue

#3443 #3882 

## What does this PR do?
- Improvement on enabling Korean support

Inspired by the work (#3882) I tried to enable Korean features but have found some missing configurations.
This PR is add those missing configs (mostly Cargo.toml) and added one test case.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Junho Choi <jh.choi@catenoid.net>
This commit is contained in:
meili-bors[bot] 2024-06-25 11:51:21 +00:00 committed by GitHub
commit 3c4c46377b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 27 additions and 0 deletions

View File

@ -49,6 +49,8 @@ chinese-pinyin = ["milli/chinese-pinyin"]
hebrew = ["milli/hebrew"] hebrew = ["milli/hebrew"]
# japanese specialized tokenization # japanese specialized tokenization
japanese = ["milli/japanese"] japanese = ["milli/japanese"]
# korean specialized tokenization
korean = ["milli/korean"]
# thai specialized tokenization # thai specialized tokenization
thai = ["milli/thai"] thai = ["milli/thai"]
# allow greek specialized tokenization # allow greek specialized tokenization

View File

@ -151,6 +151,7 @@ chinese = ["meilisearch-types/chinese"]
chinese-pinyin = ["meilisearch-types/chinese-pinyin"] chinese-pinyin = ["meilisearch-types/chinese-pinyin"]
hebrew = ["meilisearch-types/hebrew"] hebrew = ["meilisearch-types/hebrew"]
japanese = ["meilisearch-types/japanese"] japanese = ["meilisearch-types/japanese"]
korean = ["meilisearch-types/korean"]
thai = ["meilisearch-types/thai"] thai = ["meilisearch-types/thai"]
greek = ["meilisearch-types/greek"] greek = ["meilisearch-types/greek"]
khmer = ["meilisearch-types/khmer"] khmer = ["meilisearch-types/khmer"]

View File

@ -336,4 +336,28 @@ mod test {
assert_eq!(documents_ids, vec![1]); assert_eq!(documents_ids, vec![1]);
} }
#[cfg(feature = "korean")]
#[test]
fn test_hangul_language_detection() {
use crate::index::tests::TempIndex;
let index = TempIndex::new();
index
.add_documents(documents!([
{ "id": 0, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
{ "id": 1, "title": "김밥먹을래。" },
{ "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" }
]))
.unwrap();
let txn = index.write_txn().unwrap();
let mut search = Search::new(&txn, &index);
search.query("김밥");
let SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids, vec![1]);
}
} }