From c2ab7a793918f5d538ab1e6006f9793003ece5ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar=20-=20curqui?= Date: Wed, 14 Sep 2022 14:40:36 +0200 Subject: [PATCH 1/3] Update config.yml --- .github/ISSUE_TEMPLATE/config.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 1006a064d..3f6cb9462 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,13 +1,13 @@ contact_links: + - name: Support questions & other + url: https://github.com/meilisearch/meilisearch/discussions/new + about: For any other question, open a discussion in this repository - name: Language support request & feedback url: https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal?discussions_q=label%3Aproduct%3Acore%3Atokenizer+category%3A%22Feedback+%26+Feature+Proposal%22 about: The requests and feedback regarding Language support are not managed in this repository. Please upvote the related discussion in our dedicated product repository or open a new one if it doesn't exist. - - name: Feature request & feedback + - name: Any other feature request & feedback url: https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal about: The feature requests and feedback regarding the already existing features are not managed in this repository. Please open a discussion in our dedicated product repository - name: Documentation issue url: https://github.com/meilisearch/documentation/issues/new about: For documentation issues, open an issue or a PR in the documentation repository - - name: Support questions & other - url: https://github.com/meilisearch/meilisearch/discussions/new - about: For any other question, open a discussion in this repository From 5b571147718d37f31ccc86e88f90d3d1894ece1b Mon Sep 17 00:00:00 2001 From: Jakub Jirutka Date: Wed, 14 Sep 2022 20:52:11 +0200 Subject: [PATCH 2/3] Bump milli from 0.33.0 to 0.33.4 --- Cargo.lock | 16 ++++++++-------- meilisearch-auth/Cargo.toml | 2 +- meilisearch-lib/Cargo.toml | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index babbd0ab2..7a0802e3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1119,8 +1119,8 @@ dependencies = [ [[package]] name = "filter-parser" -version = "0.33.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.0#a79ff8a1a98a807f40f970131c8de2ab11560de5" +version = "0.33.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd" dependencies = [ "nom", "nom_locate", @@ -1144,8 +1144,8 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "0.33.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.0#a79ff8a1a98a807f40f970131c8de2ab11560de5" +version = "0.33.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd" dependencies = [ "serde_json", ] @@ -1657,8 +1657,8 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "0.33.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.0#a79ff8a1a98a807f40f970131c8de2ab11560de5" +version = "0.33.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd" dependencies = [ "serde_json", ] @@ -2195,8 +2195,8 @@ dependencies = [ [[package]] name = "milli" -version = "0.33.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.0#a79ff8a1a98a807f40f970131c8de2ab11560de5" +version = "0.33.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd" dependencies = [ "bimap", "bincode", diff --git a/meilisearch-auth/Cargo.toml b/meilisearch-auth/Cargo.toml index 4504180b4..470d5b8d1 100644 --- a/meilisearch-auth/Cargo.toml +++ b/meilisearch-auth/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" enum-iterator = "0.7.0" hmac = "0.12.1" meilisearch-types = { path = "../meilisearch-types" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.0" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4" } rand = "0.8.4" serde = { version = "1.0.136", features = ["derive"] } serde_json = { version = "1.0.85", features = ["preserve_order"] } diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index 74c46979e..de967286c 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -28,7 +28,7 @@ lazy_static = "1.4.0" log = "0.4.14" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.0" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4" } mime = "0.3.16" num_cpus = "1.13.1" obkv = "0.2.0" From 935f18efcfbad0196eb615bebfe7a71b7ea76bd4 Mon Sep 17 00:00:00 2001 From: Jakub Jirutka Date: Wed, 14 Sep 2022 20:57:13 +0200 Subject: [PATCH 3/3] Allow building without specialized tokenizations (Some of) these specialized tokenizations include huge dictionaries that currently account for 90% (!) of the meilisearch binary size. This commit adds chinese, hebrew, japanese, and thai feature flags that are propagated via milli down to the charabia crate. To keep it backward compatible, they are enabled by default. Related to meilisearch/milli#632 --- meilisearch-auth/Cargo.toml | 2 +- meilisearch-http/Cargo.toml | 8 ++++++-- meilisearch-lib/Cargo.toml | 18 +++++++++++++++++- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/meilisearch-auth/Cargo.toml b/meilisearch-auth/Cargo.toml index 470d5b8d1..3bbc09c4a 100644 --- a/meilisearch-auth/Cargo.toml +++ b/meilisearch-auth/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" enum-iterator = "0.7.0" hmac = "0.12.1" meilisearch-types = { path = "../meilisearch-types" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false } rand = "0.8.4" serde = { version = "1.0.136", features = ["derive"] } serde_json = { version = "1.0.85", features = ["preserve_order"] } diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 38f9a83fc..baea8b578 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -46,7 +46,7 @@ jsonwebtoken = "8.0.1" log = "0.4.14" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } -meilisearch-lib = { path = "../meilisearch-lib" } +meilisearch-lib = { path = "../meilisearch-lib", default-features = false } mimalloc = { version = "0.1.29", default-features = false } mime = "0.3.16" num_cpus = "1.13.1" @@ -90,7 +90,7 @@ urlencoding = "2.1.0" yaup = "0.2.0" [features] -default = ["analytics", "mini-dashboard"] +default = ["analytics", "meilisearch-lib/default", "mini-dashboard"] metrics = ["prometheus"] analytics = ["segment"] mini-dashboard = [ @@ -104,6 +104,10 @@ mini-dashboard = [ "tempfile", "zip", ] +chinese = ["meilisearch-lib/chinese"] +hebrew = ["meilisearch-lib/hebrew"] +japanese = ["meilisearch-lib/japanese"] +thai = ["meilisearch-lib/thai"] [package.metadata.mini-dashboard] assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.1/build.zip" diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index de967286c..bda3ecbc7 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -28,7 +28,7 @@ lazy_static = "1.4.0" log = "0.4.14" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false } mime = "0.3.16" num_cpus = "1.13.1" obkv = "0.2.0" @@ -64,3 +64,19 @@ nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f1388554 paste = "1.0.6" proptest = "1.0.0" proptest-derive = "0.3.0" + +[features] +# all specialized tokenizations +default = ["milli/default"] + +# chinese specialized tokenization +chinese = ["milli/chinese"] + +# hebrew specialized tokenization +hebrew = ["milli/hebrew"] + +# japanese specialized tokenization +japanese = ["milli/japanese"] + +# thai specialized tokenization +thai = ["milli/thai"]