From 935f18efcfbad0196eb615bebfe7a71b7ea76bd4 Mon Sep 17 00:00:00 2001 From: Jakub Jirutka Date: Wed, 14 Sep 2022 20:57:13 +0200 Subject: [PATCH] Allow building without specialized tokenizations (Some of) these specialized tokenizations include huge dictionaries that currently account for 90% (!) of the meilisearch binary size. This commit adds chinese, hebrew, japanese, and thai feature flags that are propagated via milli down to the charabia crate. To keep it backward compatible, they are enabled by default. Related to meilisearch/milli#632 --- meilisearch-auth/Cargo.toml | 2 +- meilisearch-http/Cargo.toml | 8 ++++++-- meilisearch-lib/Cargo.toml | 18 +++++++++++++++++- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/meilisearch-auth/Cargo.toml b/meilisearch-auth/Cargo.toml index 470d5b8d1..3bbc09c4a 100644 --- a/meilisearch-auth/Cargo.toml +++ b/meilisearch-auth/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" enum-iterator = "0.7.0" hmac = "0.12.1" meilisearch-types = { path = "../meilisearch-types" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false } rand = "0.8.4" serde = { version = "1.0.136", features = ["derive"] } serde_json = { version = "1.0.85", features = ["preserve_order"] } diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 38f9a83fc..baea8b578 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -46,7 +46,7 @@ jsonwebtoken = "8.0.1" log = "0.4.14" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } -meilisearch-lib = { path = "../meilisearch-lib" } +meilisearch-lib = { path = "../meilisearch-lib", default-features = false } mimalloc = { version = "0.1.29", default-features = false } mime = "0.3.16" num_cpus = "1.13.1" @@ -90,7 +90,7 @@ urlencoding = "2.1.0" yaup = "0.2.0" [features] -default = ["analytics", "mini-dashboard"] +default = ["analytics", "meilisearch-lib/default", "mini-dashboard"] metrics = ["prometheus"] analytics = ["segment"] mini-dashboard = [ @@ -104,6 +104,10 @@ mini-dashboard = [ "tempfile", "zip", ] +chinese = ["meilisearch-lib/chinese"] +hebrew = ["meilisearch-lib/hebrew"] +japanese = ["meilisearch-lib/japanese"] +thai = ["meilisearch-lib/thai"] [package.metadata.mini-dashboard] assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.1/build.zip" diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index de967286c..bda3ecbc7 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -28,7 +28,7 @@ lazy_static = "1.4.0" log = "0.4.14" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false } mime = "0.3.16" num_cpus = "1.13.1" obkv = "0.2.0" @@ -64,3 +64,19 @@ nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f1388554 paste = "1.0.6" proptest = "1.0.0" proptest-derive = "0.3.0" + +[features] +# all specialized tokenizations +default = ["milli/default"] + +# chinese specialized tokenization +chinese = ["milli/chinese"] + +# hebrew specialized tokenization +hebrew = ["milli/hebrew"] + +# japanese specialized tokenization +japanese = ["milli/japanese"] + +# thai specialized tokenization +thai = ["milli/thai"]