[package]
name = "milli"
edition = "2021"
publish = false

version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
# edition.workspace = true
license.workspace = true

[dependencies]
big_s = "1.0.2"
bimap = { version = "0.6.3", features = ["serde"] }
bincode = "1.3.3"
bstr = "1.9.1"
bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] }
byteorder = "1.5.0"
# charabia = { version = "0.9.0", default-features = false }
charabia = { git = "https://github.com/meilisearch/charabia", branch = "mutualize-char-normalizer", default-features = false }
concat-arrays = "0.1.2"
crossbeam-channel = "0.5.13"
deserr = "0.6.2"
either = { version = "1.13.0", features = ["serde"] }
flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7"
fxhash = "0.2.1"
geoutils = "0.5.1"
grenad = { version = "0.4.7", default-features = false, features = [
    "rayon", # TODO Should we keep this feature
    "tempfile"
], git = "https://github.com/meilisearch/grenad", branch = "various-improvements" }
heed = { version = "0.20.3", default-features = false, features = [
    "serde-json",
    "serde-bincode",
    "read-txn-no-tls",
] }
indexmap = { version = "2.2.6", features = ["serde"] }
json-depth-checker = { path = "../json-depth-checker" }
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
lru = "0.12.3"
memchr = "2.5.0"
memmap2 = "0.9.4"
obkv = { git = "https://github.com/kerollmops/obkv", branch = "unsized-kvreader" }
once_cell = "1.19.0"
ordered-float = "4.2.1"
rayon = "1.10.0"
roaring = { version = "0.10.6", features = ["serde"] }
rstar = { version = "0.12.0", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order", "raw_value"] }
slice-group-by = "0.3.1"
smallstr = { version = "0.3.0", features = ["serde"] }
smallvec = "1.13.2"
smartstring = "1.0.1"
tempfile = "3.10.1"
thiserror = "1.0.61"
time = { version = "0.3.36", features = [
    "serde-well-known",
    "formatting",
    "parsing",
    "macros",
] }
uuid = { version = "1.10.0", features = ["v4"] }

filter-parser = { path = "../filter-parser" }

# documents words self-join
itertools = "0.13.0"

csv = "1.3.0"
candle-core = { version = "0.6.0" }
candle-transformers = { version = "0.6.0" }
candle-nn = { version = "0.6.0" }
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
    "onig",
] }
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
    "online",
] }
tiktoken-rs = "0.5.9"
liquid = "0.26.6"
rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" }
rand = "0.8.5"
tracing = "0.1.40"
ureq = { version = "2.10.0", features = ["json"] }
url = "2.5.2"
rayon-par-bridge = "0.1.0"
hashbrown = "0.14.5"

[dev-dependencies]
mimalloc = { version = "0.1.43", default-features = false }
insta = "1.39.0"
maplit = "1.0.2"
md5 = "0.7.0"
meili-snap = { path = "../meili-snap" }
rand = { version = "0.8.5", features = ["small_rng"] }

[features]
all-tokenizations = [
    "charabia/chinese",
    "charabia/hebrew",
    "charabia/japanese",
    "charabia/thai",
    "charabia/korean",
    "charabia/greek",
    "charabia/khmer",
    "charabia/vietnamese",
    "charabia/swedish-recomposition",
    "charabia/german-segmentation",
]

# Use POSIX semaphores instead of SysV semaphores in LMDB
# For more information on this feature, see heed's Cargo.toml
lmdb-posix-sem = ["heed/posix-sem"]

# allow chinese specialized tokenization
chinese = ["charabia/chinese"]
chinese-pinyin = ["chinese", "charabia/chinese-normalization-pinyin"]

# allow hebrew specialized tokenization
hebrew = ["charabia/hebrew"]

# allow japanese specialized tokenization
japanese = ["charabia/japanese"]
japanese-transliteration = ["charabia/japanese-transliteration"]

# allow korean specialized tokenization
korean = ["charabia/korean"]

# allow thai specialized tokenization
thai = ["charabia/thai"]

# allow greek specialized tokenization
greek = ["charabia/greek"]

# allow khmer specialized tokenization
khmer = ["charabia/khmer"]

# allow vietnamese specialized tokenization
vietnamese = ["charabia/vietnamese"]

# allow german specialized tokenization
german = ["charabia/german-segmentation"]

# force swedish character recomposition
swedish-recomposition = ["charabia/swedish-recomposition"]

# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
cuda = ["candle-core/cuda"]