2021-02-12 23:15:09 +08:00
|
|
|
[package]
|
|
|
|
name = "milli"
|
|
|
|
edition = "2018"
|
2023-02-15 20:51:07 +08:00
|
|
|
publish = false
|
|
|
|
|
|
|
|
version.workspace = true
|
|
|
|
authors.workspace = true
|
|
|
|
description.workspace = true
|
|
|
|
homepage.workspace = true
|
|
|
|
readme.workspace = true
|
|
|
|
# edition.workspace = true
|
|
|
|
license.workspace = true
|
2021-02-12 23:15:09 +08:00
|
|
|
|
|
|
|
[dependencies]
|
2023-04-11 21:23:51 +08:00
|
|
|
bimap = { version = "0.6.3", features = ["serde"] }
|
2021-08-31 17:44:15 +08:00
|
|
|
bincode = "1.3.3"
|
2023-04-11 21:23:51 +08:00
|
|
|
bstr = "1.4.0"
|
2023-06-08 18:19:06 +08:00
|
|
|
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
|
2022-03-15 00:00:53 +08:00
|
|
|
byteorder = "1.4.3"
|
2023-10-26 23:01:10 +08:00
|
|
|
charabia = { version = "0.8.5", default-features = false }
|
2021-07-06 17:31:24 +08:00
|
|
|
concat-arrays = "0.1.2"
|
2023-04-11 21:23:51 +08:00
|
|
|
crossbeam-channel = "0.5.8"
|
2023-11-29 00:19:57 +08:00
|
|
|
deserr = "0.6.0"
|
2023-07-03 23:10:05 +08:00
|
|
|
either = { version = "1.8.1", features = ["serde"] }
|
2022-06-02 21:46:44 +08:00
|
|
|
flatten-serde-json = { path = "../flatten-serde-json" }
|
2022-03-15 00:00:53 +08:00
|
|
|
fst = "0.4.7"
|
2021-02-12 23:15:09 +08:00
|
|
|
fxhash = "0.2.1"
|
2022-10-04 17:29:39 +08:00
|
|
|
geoutils = "0.5.1"
|
2023-11-01 20:55:18 +08:00
|
|
|
grenad = { version = "0.4.5", default-features = false, features = [
|
|
|
|
"rayon", "tempfile"
|
2023-05-15 17:42:30 +08:00
|
|
|
] }
|
2023-11-27 18:52:22 +08:00
|
|
|
heed = { version = "0.20.0-alpha.9", default-features = false, features = [
|
2023-11-23 01:21:19 +08:00
|
|
|
"serde-json", "serde-bincode", "read-txn-no-tls"
|
2023-05-15 17:42:30 +08:00
|
|
|
] }
|
2023-08-09 18:29:51 +08:00
|
|
|
indexmap = { version = "2.0.0", features = ["serde"] }
|
2023-07-25 18:36:01 +08:00
|
|
|
instant-distance = { version = "0.6.1", features = ["with-serde"] }
|
2022-04-12 17:22:36 +08:00
|
|
|
json-depth-checker = { path = "../json-depth-checker" }
|
2022-03-15 00:00:53 +08:00
|
|
|
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
2023-08-09 18:02:13 +08:00
|
|
|
memmap2 = "0.7.1"
|
2021-07-06 17:31:24 +08:00
|
|
|
obkv = "0.2.0"
|
2023-04-11 21:23:51 +08:00
|
|
|
once_cell = "1.17.1"
|
|
|
|
ordered-float = "3.6.0"
|
2023-06-14 20:20:05 +08:00
|
|
|
rand_pcg = { version = "0.3.1", features = ["serde1"] }
|
2023-04-11 21:23:51 +08:00
|
|
|
rayon = "1.7.0"
|
2022-10-04 17:29:39 +08:00
|
|
|
roaring = "0.10.1"
|
2023-08-09 18:06:02 +08:00
|
|
|
rstar = { version = "0.11.0", features = ["serde"] }
|
2023-04-11 21:23:51 +08:00
|
|
|
serde = { version = "1.0.160", features = ["derive"] }
|
|
|
|
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
2022-03-15 00:00:53 +08:00
|
|
|
slice-group-by = "0.3.0"
|
2023-05-15 17:42:30 +08:00
|
|
|
smallstr = { version = "0.3.0", features = ["serde"] }
|
2022-10-04 17:29:39 +08:00
|
|
|
smallvec = "1.10.0"
|
2022-04-11 21:43:18 +08:00
|
|
|
smartstring = "1.0.1"
|
2023-04-11 21:23:51 +08:00
|
|
|
tempfile = "3.5.0"
|
|
|
|
thiserror = "1.0.40"
|
2023-05-15 17:42:30 +08:00
|
|
|
time = { version = "0.3.20", features = [
|
|
|
|
"serde-well-known",
|
|
|
|
"formatting",
|
|
|
|
"parsing",
|
|
|
|
"macros",
|
|
|
|
] }
|
2023-04-11 21:23:51 +08:00
|
|
|
uuid = { version = "1.3.1", features = ["v4"] }
|
2021-02-12 23:15:09 +08:00
|
|
|
|
2021-11-09 23:16:28 +08:00
|
|
|
filter-parser = { path = "../filter-parser" }
|
2021-02-12 23:15:09 +08:00
|
|
|
|
|
|
|
# documents words self-join
|
2023-08-09 17:50:24 +08:00
|
|
|
itertools = "0.11.0"
|
2021-02-12 23:15:09 +08:00
|
|
|
|
2023-07-11 00:41:54 +08:00
|
|
|
# profiling
|
|
|
|
puffin = "0.16.0"
|
|
|
|
|
2021-02-12 23:15:09 +08:00
|
|
|
# logging
|
2022-10-04 17:29:39 +08:00
|
|
|
log = "0.4.17"
|
2022-03-15 00:00:53 +08:00
|
|
|
logging_timer = "1.1.0"
|
2023-04-11 21:23:51 +08:00
|
|
|
csv = "1.2.1"
|
2023-11-15 22:43:57 +08:00
|
|
|
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
|
|
|
candle-transformers = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
|
|
|
candle-nn = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
|
|
|
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.14.1", version = "0.14.1" }
|
|
|
|
hf-hub = "0.3.2"
|
2021-02-12 23:15:09 +08:00
|
|
|
|
|
|
|
[dev-dependencies]
|
2023-07-29 11:35:47 +08:00
|
|
|
mimalloc = { version = "0.1.37", default-features = false }
|
2021-03-11 18:48:55 +08:00
|
|
|
big_s = "1.0.2"
|
2023-04-11 21:23:51 +08:00
|
|
|
insta = "1.29.0"
|
2021-02-12 23:15:09 +08:00
|
|
|
maplit = "1.0.2"
|
2022-08-03 14:45:26 +08:00
|
|
|
md5 = "0.7.0"
|
2023-09-18 15:59:38 +08:00
|
|
|
meili-snap = { path = "../meili-snap" }
|
2023-05-15 17:42:30 +08:00
|
|
|
rand = { version = "0.8.5", features = ["small_rng"] }
|
2021-02-12 23:15:09 +08:00
|
|
|
|
|
|
|
[features]
|
2023-10-26 23:01:10 +08:00
|
|
|
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]
|
2022-09-08 18:19:44 +08:00
|
|
|
|
2023-01-20 01:08:38 +08:00
|
|
|
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
|
|
|
# For more information on this feature, see heed's Cargo.toml
|
|
|
|
lmdb-posix-sem = ["heed/posix-sem"]
|
|
|
|
|
2022-09-08 18:19:44 +08:00
|
|
|
# allow chinese specialized tokenization
|
|
|
|
chinese = ["charabia/chinese"]
|
|
|
|
|
|
|
|
# allow hebrew specialized tokenization
|
|
|
|
hebrew = ["charabia/hebrew"]
|
|
|
|
|
|
|
|
# allow japanese specialized tokenization
|
|
|
|
japanese = ["charabia/japanese"]
|
2022-12-12 21:53:08 +08:00
|
|
|
japanese-transliteration = ["charabia/japanese-transliteration"]
|
|
|
|
|
|
|
|
# allow korean specialized tokenization
|
|
|
|
korean = ["charabia/korean"]
|
2022-09-08 18:19:44 +08:00
|
|
|
|
|
|
|
# allow thai specialized tokenization
|
|
|
|
thai = ["charabia/thai"]
|
2023-04-26 20:56:54 +08:00
|
|
|
|
|
|
|
# allow greek specialized tokenization
|
2023-04-26 20:58:32 +08:00
|
|
|
greek = ["charabia/greek"]
|
2023-10-26 23:01:10 +08:00
|
|
|
|
|
|
|
# allow khmer specialized tokenization
|
|
|
|
khmer = ["charabia/khmer"]
|