diff --git a/Cargo.lock b/Cargo.lock index bb6105741..e8747dc7a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -709,7 +709,6 @@ dependencies = [ "csv", "deunicode", "either", - "finl_unicode", "fst", "irg-kvariants", "jieba-rs", @@ -1443,12 +1442,6 @@ dependencies = [ "nom_locate", ] -[[package]] -name = "finl_unicode" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6" - [[package]] name = "flate2" version = "1.0.26" diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index e6eae7cb1..9c80aed31 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -876,3 +876,230 @@ async fn experimental_feature_vector_store() { meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @"[]"); } + +#[cfg(feature = "default")] +#[actix_rt::test] +async fn camelcased_words() { + let server = Server::new().await; + let index = server.index("test"); + + // related to https://github.com/meilisearch/meilisearch/issues/3818 + let documents = json!([ + { "id": 0, "title": "DeLonghi" }, + { "id": 1, "title": "delonghi" }, + { "id": 2, "title": "TestAB" }, + { "id": 3, "title": "TestAb" }, + { "id": 4, "title": "testab" }, + ]); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + index + .search(json!({"q": "deLonghi"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "title": "DeLonghi" + }, + { + "id": 1, + "title": "delonghi" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "dellonghi"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "title": "DeLonghi" + }, + { + "id": 1, + "title": "delonghi" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "testa"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "title": "TestAB" + }, + { + "id": 3, + "title": "TestAb" + }, + { + "id": 4, + "title": "testab" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "testab"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "title": "TestAB" + }, + { + "id": 3, + "title": "TestAb" + }, + { + "id": 4, + "title": "testab" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "TestaB"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "title": "TestAB" + }, + { + "id": 3, + "title": "TestAb" + }, + { + "id": 4, + "title": "testab" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "Testab"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "title": "TestAB" + }, + { + "id": 3, + "title": "TestAb" + }, + { + "id": 4, + "title": "testab" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "TestAb"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "title": "TestAB" + }, + { + "id": 3, + "title": "TestAb" + }, + { + "id": 4, + "title": "testab" + } + ] + "###); + }) + .await; + + // with Typos + index + .search(json!({"q": "dellonghi"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "title": "DeLonghi" + }, + { + "id": 1, + "title": "delonghi" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "TetsAB"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "title": "TestAB" + }, + { + "id": 3, + "title": "TestAb" + }, + { + "id": 4, + "title": "testab" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "TetsAB"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "title": "TestAB" + }, + { + "id": 3, + "title": "TestAb" + }, + { + "id": 4, + "title": "testab" + } + ] + "###); + }) + .await; +} diff --git a/meilisearch/tests/search/restrict_searchable.rs b/meilisearch/tests/search/restrict_searchable.rs index b589ccfb7..219c747ed 100644 --- a/meilisearch/tests/search/restrict_searchable.rs +++ b/meilisearch/tests/search/restrict_searchable.rs @@ -310,7 +310,7 @@ async fn exactness_ranking_rule_order() { }, { "title": "Captain Marvel", - "desc": "CaptainMarvel", + "desc": "Captain the Marvel", "id": "2", }]), ) diff --git a/milli/Cargo.toml b/milli/Cargo.toml index bc1d9b7ee..854d29141 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -81,7 +81,7 @@ md5 = "0.7.0" rand = { version = "0.8.5", features = ["small_rng"] } [features] -all-tokenizations = ["charabia/default"] +all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"] # Use POSIX semaphores instead of SysV semaphores in LMDB # For more information on this feature, see heed's Cargo.toml