From 8d93de28b87dd1996ce7d772847dc6737cc65893 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 26 Feb 2025 22:11:16 +0100 Subject: [PATCH] Add thread pool to cargo toml --- Cargo.lock | 8 +++ crates/milli/Cargo.toml | 131 ++++++++++++++++++++-------------------- 2 files changed, 74 insertions(+), 65 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6a42ffa26..660972be5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3814,6 +3814,7 @@ dependencies = [ "roaring", "rstar", "rustc-hash 2.1.0", + "scoped_thread_pool", "serde", "serde_json", "slice-group-by", @@ -5088,6 +5089,13 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scoped_thread_pool" +version = "0.1.0" +dependencies = [ + "crossbeam-channel", +] + [[package]] name = "scopeguard" version = "1.2.0" diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml index 5eb89ea53..f0a003d4d 100644 --- a/crates/milli/Cargo.toml +++ b/crates/milli/Cargo.toml @@ -1,120 +1,121 @@ [package] -name = "milli" edition = "2021" +name = "milli" publish = false -version.workspace = true authors.workspace = true description.workspace = true homepage.workspace = true readme.workspace = true +version.workspace = true # edition.workspace = true license.workspace = true [dependencies] big_s = "1.0.2" -bimap = { version = "0.6.3", features = ["serde"] } +bimap = {version = "0.6.3", features = ["serde"]} bincode = "1.3.3" bstr = "1.11.3" -bytemuck = { version = "1.21.0", features = ["extern_crate_alloc"] } +bytemuck = {version = "1.21.0", features = ["extern_crate_alloc"]} byteorder = "1.5.0" -charabia = { version = "0.9.2", default-features = false } +charabia = {version = "0.9.2", default-features = false} concat-arrays = "0.1.2" convert_case = "0.6.0" crossbeam-channel = "0.5.14" deserr = "0.6.3" -either = { version = "1.13.0", features = ["serde"] } -flatten-serde-json = { path = "../flatten-serde-json" } +either = {version = "1.13.0", features = ["serde"]} +flatten-serde-json = {path = "../flatten-serde-json"} fst = "0.4.7" fxhash = "0.2.1" geoutils = "0.5.1" -grenad = { version = "0.5.0", default-features = false, features = ["rayon", "tempfile"] } -heed = { version = "0.20.5", default-features = false, features = [ - "serde-json", - "serde-bincode", - "read-txn-no-tls", -] } -indexmap = { version = "2.7.0", features = ["serde"] } -json-depth-checker = { path = "../json-depth-checker" } -levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } +grenad = {version = "0.5.0", default-features = false, features = ["rayon", "tempfile"]} +heed = {version = "0.20.5", default-features = false, features = [ + "serde-json", + "serde-bincode", + "read-txn-no-tls", +]} +indexmap = {version = "2.7.0", features = ["serde"]} +json-depth-checker = {path = "../json-depth-checker"} +levenshtein_automata = {version = "0.2.1", features = ["fst_automaton"]} memchr = "2.7.4" memmap2 = "0.9.5" obkv = "0.3.0" once_cell = "1.20.2" ordered-float = "4.6.0" rayon = "1.10.0" -roaring = { version = "0.10.10", features = ["serde"] } -rstar = { version = "0.12.2", features = ["serde"] } -serde = { version = "1.0.217", features = ["derive"] } -serde_json = { version = "1.0.135", features = ["preserve_order", "raw_value"] } +roaring = {version = "0.10.10", features = ["serde"]} +rstar = {version = "0.12.2", features = ["serde"]} +serde = {version = "1.0.217", features = ["derive"]} +serde_json = {version = "1.0.135", features = ["preserve_order", "raw_value"]} slice-group-by = "0.3.1" -smallstr = { version = "0.3.0", features = ["serde"] } +smallstr = {version = "0.3.0", features = ["serde"]} smallvec = "1.13.2" smartstring = "1.0.1" tempfile = "3.15.0" thiserror = "2.0.9" -time = { version = "0.3.37", features = [ - "serde-well-known", - "formatting", - "parsing", - "macros", -] } -uuid = { version = "1.11.0", features = ["v4"] } +time = {version = "0.3.37", features = [ + "serde-well-known", + "formatting", + "parsing", + "macros", +]} +uuid = {version = "1.11.0", features = ["v4"]} -filter-parser = { path = "../filter-parser" } +filter-parser = {path = "../filter-parser"} +scoped_thread_pool = {path = "/home/dureuill/dev/scoped_thread_pool"} # documents words self-join itertools = "0.14.0" -csv = "1.3.1" -candle-core = { version = "0.8.2" } -candle-transformers = { version = "0.8.2" } -candle-nn = { version = "0.8.2" } -tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [ - "onig", -] } -hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [ - "online", -] } -tiktoken-rs = "0.6.0" -liquid = "0.26.9" -rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838f366f2b83fd65f20a1e4", features = [ - "serde", - "no_module", - "no_custom_syntax", - "no_time", - "sync", -] } +allocator-api2 = "0.2.21" arroy = "0.5.0" -rand = "0.8.5" -tracing = "0.1.41" -ureq = { version = "2.12.1", features = ["json"] } -url = "2.5.4" -rayon-par-bridge = "0.1.0" -hashbrown = "0.15.2" +bbqueue = {git = "https://github.com/meilisearch/bbqueue"} bumpalo = "3.16.0" bumparaw-collections = "0.1.4" -thread_local = "1.1.8" -allocator-api2 = "0.2.21" -rustc-hash = "2.1.0" -uell = "0.1.0" +candle-core = {version = "0.8.2"} +candle-nn = {version = "0.8.2"} +candle-transformers = {version = "0.8.2"} +csv = "1.3.1" enum-iterator = "2.1.0" -bbqueue = { git = "https://github.com/meilisearch/bbqueue" } -flume = { version = "0.11.1", default-features = false } -utoipa = { version = "5.3.1", features = ["non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] } +flume = {version = "0.11.1", default-features = false} +hashbrown = "0.15.2" +hf-hub = {git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [ + "online", +]} +liquid = "0.26.9" +rand = "0.8.5" +rayon-par-bridge = "0.1.0" +rhai = {git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838f366f2b83fd65f20a1e4", features = [ + "serde", + "no_module", + "no_custom_syntax", + "no_time", + "sync", +]} +rustc-hash = "2.1.0" +thread_local = "1.1.8" +tiktoken-rs = "0.6.0" +tokenizers = {git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [ + "onig", +]} +tracing = "0.1.41" +uell = "0.1.0" +ureq = {version = "2.12.1", features = ["json"]} +url = "2.5.4" +utoipa = {version = "5.3.1", features = ["non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"]} [dev-dependencies] -mimalloc = { version = "0.1.43", default-features = false } +mimalloc = {version = "0.1.43", default-features = false} # fixed version due to format breakages in v1.40 insta = "=1.39.0" maplit = "1.0.2" md5 = "0.7.0" -meili-snap = { path = "../meili-snap" } -rand = { version = "0.8.5", features = ["small_rng"] } +meili-snap = {path = "../meili-snap"} +rand = {version = "0.8.5", features = ["small_rng"]} [features] all-tokenizations = [ - "charabia/default", + "charabia/default", ] # Use POSIX semaphores instead of SysV semaphores in LMDB