[package]
edition = "2021"
name = "milli"
publish = false

authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
version.workspace = true
# edition.workspace = true
license.workspace = true

[dependencies]
big_s = "1.0.2"
bimap = {version = "0.6.3", features = ["serde"]}
bincode = "1.3.3"
bstr = "1.11.3"
bytemuck = {version = "1.21.0", features = ["extern_crate_alloc"]}
byteorder = "1.5.0"
charabia = {version = "0.9.2", default-features = false}
concat-arrays = "0.1.2"
convert_case = "0.6.0"
crossbeam-channel = "0.5.14"
deserr = "0.6.3"
either = {version = "1.13.0", features = ["serde"]}
flatten-serde-json = {path = "../flatten-serde-json"}
fst = "0.4.7"
fxhash = "0.2.1"
geoutils = "0.5.1"
grenad = {version = "0.5.0", default-features = false, features = ["rayon", "tempfile"]}
heed = {version = "0.20.5", default-features = false, features = [
  "serde-json",
  "serde-bincode",
  "read-txn-no-tls",
]}
indexmap = {version = "2.7.0", features = ["serde"]}
json-depth-checker = {path = "../json-depth-checker"}
levenshtein_automata = {version = "0.2.1", features = ["fst_automaton"]}
memchr = "2.7.4"
memmap2 = "0.9.5"
obkv = "0.3.0"
once_cell = "1.20.2"
ordered-float = "4.6.0"
rayon = "1.10.0"
roaring = {version = "0.10.10", features = ["serde"]}
rstar = {version = "0.12.2", features = ["serde"]}
serde = {version = "1.0.217", features = ["derive"]}
serde_json = {version = "1.0.135", features = ["preserve_order", "raw_value"]}
slice-group-by = "0.3.1"
smallstr = {version = "0.3.0", features = ["serde"]}
smallvec = "1.13.2"
smartstring = "1.0.1"
tempfile = "3.15.0"
thiserror = "2.0.9"
time = {version = "0.3.37", features = [
  "serde-well-known",
  "formatting",
  "parsing",
  "macros",
]}
uuid = {version = "1.11.0", features = ["v4"]}

filter-parser = {path = "../filter-parser"}
scoped_thread_pool = {path = "/home/dureuill/dev/scoped_thread_pool"}

# documents words self-join
itertools = "0.14.0"

allocator-api2 = "0.2.21"
arroy = "0.5.0"
bbqueue = {git = "https://github.com/meilisearch/bbqueue"}
bumpalo = "3.16.0"
bumparaw-collections = "0.1.4"
candle-core = {version = "0.8.2"}
candle-nn = {version = "0.8.2"}
candle-transformers = {version = "0.8.2"}
csv = "1.3.1"
enum-iterator = "2.1.0"
flume = {version = "0.11.1", default-features = false}
hashbrown = "0.15.2"
hf-hub = {git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
  "online",
]}
liquid = "0.26.9"
rand = "0.8.5"
rayon-par-bridge = "0.1.0"
rhai = {git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838f366f2b83fd65f20a1e4", features = [
  "serde",
  "no_module",
  "no_custom_syntax",
  "no_time",
  "sync",
]}
rustc-hash = "2.1.0"
thread_local = "1.1.8"
tiktoken-rs = "0.6.0"
tokenizers = {git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
  "onig",
]}
tracing = "0.1.41"
uell = "0.1.0"
ureq = {version = "2.12.1", features = ["json"]}
url = "2.5.4"
utoipa = {version = "5.3.1", features = ["non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"]}

[dev-dependencies]
mimalloc = {version = "0.1.43", default-features = false}
# fixed version due to format breakages in v1.40
insta = "=1.39.0"
maplit = "1.0.2"
md5 = "0.7.0"
meili-snap = {path = "../meili-snap"}
rand = {version = "0.8.5", features = ["small_rng"]}

[features]
all-tokenizations = [
  "charabia/default",
]

# Use POSIX semaphores instead of SysV semaphores in LMDB
# For more information on this feature, see heed's Cargo.toml
lmdb-posix-sem = ["heed/posix-sem"]

# allow chinese specialized tokenization
chinese = ["charabia/chinese"]
chinese-pinyin = ["chinese", "charabia/chinese-normalization-pinyin"]

# allow hebrew specialized tokenization
hebrew = ["charabia/hebrew"]

# allow japanese specialized tokenization
japanese = ["charabia/japanese"]
japanese-transliteration = ["charabia/japanese-transliteration"]

# allow korean specialized tokenization
korean = ["charabia/korean"]

# allow thai specialized tokenization
thai = ["charabia/thai"]

# allow greek specialized tokenization
greek = ["charabia/greek"]

# allow khmer specialized tokenization
khmer = ["charabia/khmer"]

# allow vietnamese specialized tokenization
vietnamese = ["charabia/vietnamese"]

# allow german specialized tokenization
german = ["charabia/german-segmentation"]

# force swedish character recomposition
swedish-recomposition = ["charabia/swedish-recomposition"]

# allow turkish specialized tokenization
turkish = ["charabia/turkish"]

# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
cuda = ["candle-core/cuda"]