3687: Allow to disable specialized tokenizations (again) r=Kerollmops a=jirutka

In PR #2773, I added the `chinese`, `hebrew`, `japanese` and `thai` feature flags to allow melisearch to be built without huge specialed tokenizations that took up 90% of the melisearch binary size. Unfortunately, due to some recent changes, this doesn't work anymore. The problem lies in excessive use of the `default` feature flag, which infects the dependency graph.

Instead of adding `default-features = false` here and there, it's easier and more future-proof to not declare `default` in `milli` and `meilisearch-types`. I've renamed it to `all-tokenizers`, which also makes it a bit clearer what it's about.


Co-authored-by: Jakub Jirutka <jakub@jirutka.cz>
This commit is contained in:
meili-bors[bot] 2023-05-04 14:48:01 +00:00 committed by GitHub
commit 9f7981df28
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 12 additions and 10 deletions

View File

@ -13,7 +13,7 @@ license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.70" anyhow = "1.0.70"
csv = "1.2.1" csv = "1.2.1"
milli = { path = "../milli", default-features = false } milli = { path = "../milli" }
mimalloc = { version = "0.1.36", default-features = false } mimalloc = { version = "0.1.36", default-features = false }
serde_json = { version = "1.0.95", features = ["preserve_order"] } serde_json = { version = "1.0.95", features = ["preserve_order"] }
@ -31,7 +31,7 @@ flate2 = "1.0.25"
reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false } reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false }
[features] [features]
default = ["milli/default"] default = ["milli/all-tokenizations"]
[[bench]] [[bench]]
name = "search_songs" name = "search_songs"

View File

@ -22,7 +22,7 @@ file-store = { path = "../file-store" }
flate2 = "1.0.25" flate2 = "1.0.25"
fst = "0.4.7" fst = "0.4.7"
memmap2 = "0.5.10" memmap2 = "0.5.10"
milli = { path = "../milli", default-features = false } milli = { path = "../milli" }
roaring = { version = "0.10.1", features = ["serde"] } roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] } serde = { version = "1.0.160", features = ["derive"] }
serde-cs = "0.2.4" serde-cs = "0.2.4"
@ -40,7 +40,7 @@ meili-snap = { path = "../meili-snap" }
[features] [features]
# all specialized tokenizations # all specialized tokenizations
default = ["milli/default"] all-tokenizations = ["milli/all-tokenizations"]
# chinese specialized tokenization # chinese specialized tokenization
chinese = ["milli/chinese"] chinese = ["milli/chinese"]

View File

@ -106,7 +106,7 @@ vergen = { version = "7.5.1", default-features = false, features = ["git"] }
zip = { version = "0.6.4", optional = true } zip = { version = "0.6.4", optional = true }
[features] [features]
default = ["analytics", "meilisearch-types/default", "mini-dashboard"] default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
analytics = ["segment"] analytics = ["segment"]
mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"] mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"]
chinese = ["meilisearch-types/chinese"] chinese = ["meilisearch-types/chinese"]

View File

@ -69,7 +69,7 @@ rand = {version = "0.8.5", features = ["small_rng"] }
fuzzcheck = "0.12.1" fuzzcheck = "0.12.1"
[features] [features]
default = [ "charabia/default" ] all-tokenizations = [ "charabia/default" ]
# Use POSIX semaphores instead of SysV semaphores in LMDB # Use POSIX semaphores instead of SysV semaphores in LMDB
# For more information on this feature, see heed's Cargo.toml # For more information on this feature, see heed's Cargo.toml

View File

@ -201,12 +201,14 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
#[allow(unused_imports)]
use super::*; use super::*;
use crate::index::tests::TempIndex;
#[cfg(feature = "default")] #[cfg(feature = "japanese")]
#[test] #[test]
fn test_kanji_language_detection() { fn test_kanji_language_detection() {
use crate::index::tests::TempIndex;
let index = TempIndex::new(); let index = TempIndex::new();
index index

View File

@ -4,7 +4,7 @@ pub mod distinct;
pub mod exactness; pub mod exactness;
pub mod geo_sort; pub mod geo_sort;
pub mod integration; pub mod integration;
#[cfg(feature = "default")] #[cfg(feature = "all-tokenizations")]
pub mod language; pub mod language;
pub mod ngram_split_words; pub mod ngram_split_words;
pub mod proximity; pub mod proximity;

View File

@ -1581,7 +1581,7 @@ mod tests {
assert_eq!(count, 4); assert_eq!(count, 4);
} }
#[cfg(feature = "default")] #[cfg(feature = "chinese")]
#[test] #[test]
fn test_meilisearch_1714() { fn test_meilisearch_1714() {
let index = TempIndex::new(); let index = TempIndex::new();