From ad35edfa328452b39488e00b356b60ae51331f67 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 22 Feb 2023 15:47:15 +0100 Subject: [PATCH 1/2] Add test --- meilisearch/tests/search/formatted.rs | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/meilisearch/tests/search/formatted.rs b/meilisearch/tests/search/formatted.rs index 191720602..076c14fe0 100644 --- a/meilisearch/tests/search/formatted.rs +++ b/meilisearch/tests/search/formatted.rs @@ -442,3 +442,37 @@ async fn displayedattr_2_smol() { ) .await; } + +#[cfg(feature = "default")] +#[actix_rt::test] +async fn test_cjk_highlight() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = json!([ + { "id": 0, "title": "この度、クーポンで無料で頂きました。" }, + { "id": 1, "title": "大卫到了扫罗那里" }, + ]); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + index + .search(json!({"q": "で", "attributesToHighlight": ["title"]}), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0]["_formatted"]["title"], + json!("この度、クーポン無料頂きました。") + ); + }) + .await; + + index + .search(json!({"q": "大卫", "attributesToHighlight": ["title"]}), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0]["_formatted"]["title"], + json!("大卫到了扫罗那里") + ); + }) + .await; +} From 28d6a4466d7d165008f8bd96c1624ad3cb3ceb16 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 22 Feb 2023 17:43:10 +0100 Subject: [PATCH 2/2] Make the tokenizer creating a char map during highlighting --- meilisearch/src/search.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index f48563141..6621a494a 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -243,9 +243,10 @@ pub fn perform_search( &displayed_ids, ); - let tokenizer = TokenizerBuilder::default().build(); + let mut tokenizer_buidler = TokenizerBuilder::default(); + tokenizer_buidler.create_char_map(true); - let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer); + let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_buidler.build()); formatter_builder.crop_marker(query.crop_marker); formatter_builder.highlight_prefix(query.highlight_pre_tag); formatter_builder.highlight_suffix(query.highlight_post_tag);