Update Charabia to 0.7.1

This commit is contained in:
ManyTheFish 2023-02-20 14:00:31 +01:00
parent 0bc1a18f52
commit cb8d5f2d4b
2 changed files with 91 additions and 86 deletions

174
Cargo.lock generated
View File

@ -523,12 +523,6 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "build_const"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ae4235e6dac0694637c763029ecea1a2ec9e4e06ec2729bd21ba4d9c863eb7"
[[package]] [[package]]
name = "bumpalo" name = "bumpalo"
version = "3.11.1" version = "3.11.1"
@ -665,16 +659,19 @@ dependencies = [
[[package]] [[package]]
name = "charabia" name = "charabia"
version = "0.7.0" version = "0.7.1"
source = "git+https://github.com/meilisearch/charabia?branch=fix-script-lang-serialization#c5efba56d433ff783e162009e020baba322afde0" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ad3d9667a6b4e03813162c22c4d58235c2dc25d580d60837ce29199038341c9"
dependencies = [ dependencies = [
"cow-utils", "cow-utils",
"csv", "csv",
"deunicode", "deunicode",
"fst", "fst",
"irg-kvariants",
"jieba-rs", "jieba-rs",
"kvariants",
"lindera", "lindera",
"lindera-ipadic",
"lindera-ko-dic",
"once_cell", "once_cell",
"pinyin", "pinyin",
"serde", "serde",
@ -727,14 +724,9 @@ version = "3.2.23"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5"
dependencies = [ dependencies = [
"atty",
"bitflags", "bitflags",
"clap_derive 3.2.18",
"clap_lex 0.2.4", "clap_lex 0.2.4",
"indexmap", "indexmap",
"once_cell",
"strsim",
"termcolor",
"textwrap", "textwrap",
] ]
@ -745,7 +737,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7db700bc935f9e43e88d00b0850dae18a63773cfbec6d8e070fccf7fef89a39" checksum = "a7db700bc935f9e43e88d00b0850dae18a63773cfbec6d8e070fccf7fef89a39"
dependencies = [ dependencies = [
"bitflags", "bitflags",
"clap_derive 4.0.21", "clap_derive",
"clap_lex 0.3.0", "clap_lex 0.3.0",
"is-terminal", "is-terminal",
"once_cell", "once_cell",
@ -753,19 +745,6 @@ dependencies = [
"termcolor", "termcolor",
] ]
[[package]]
name = "clap_derive"
version = "3.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65"
dependencies = [
"heck",
"proc-macro-error",
"proc-macro2 1.0.49",
"quote 1.0.23",
"syn 1.0.107",
]
[[package]] [[package]]
name = "clap_derive" name = "clap_derive"
version = "4.0.21" version = "4.0.21"
@ -879,15 +858,6 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "crc"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb"
dependencies = [
"build_const",
]
[[package]] [[package]]
name = "crc32fast" name = "crc32fast"
version = "1.3.2" version = "1.3.2"
@ -1333,6 +1303,19 @@ dependencies = [
"termcolor", "termcolor",
] ]
[[package]]
name = "env_logger"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85cdab6a89accf66733ad5a1693a4dcced6aeff64602b634530dd73c1f3ee9f0"
dependencies = [
"humantime",
"is-terminal",
"log",
"regex",
"termcolor",
]
[[package]] [[package]]
name = "errno" name = "errno"
version = "0.2.8" version = "0.2.8"
@ -1986,6 +1969,17 @@ version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146" checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146"
[[package]]
name = "irg-kvariants"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c73214298363629cf9dbfc93b426808865ee3c121029778cb31b1284104fdf78"
dependencies = [
"csv",
"once_cell",
"serde",
]
[[package]] [[package]]
name = "is-terminal" name = "is-terminal"
version = "0.4.2" version = "0.4.2"
@ -2075,13 +2069,12 @@ dependencies = [
] ]
[[package]] [[package]]
name = "kvariants" name = "kanaria"
version = "0.1.0" version = "0.2.0"
source = "git+https://github.com/meilisearch/charabia?branch=fix-script-lang-serialization#c5efba56d433ff783e162009e020baba322afde0" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0f9d9652540055ac4fded998a73aca97d965899077ab1212587437da44196ff"
dependencies = [ dependencies = [
"csv", "bitflags",
"once_cell",
"serde",
] ]
[[package]] [[package]]
@ -2153,14 +2146,15 @@ dependencies = [
[[package]] [[package]]
name = "lindera" name = "lindera"
version = "0.17.0" version = "0.21.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "082ca91ac4d1557028ace9bfb8cee1500d156a4574dda93cfcdcf4caaebb9bd7" checksum = "0f33a20bb9cbf95572b2d2f40d7040c8d8c7ad09ae20e1f6513db6ef2564dfc5"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
"byteorder", "byteorder",
"encoding", "encoding",
"kanaria",
"lindera-cc-cedict-builder", "lindera-cc-cedict-builder",
"lindera-core", "lindera-core",
"lindera-dictionary", "lindera-dictionary",
@ -2169,24 +2163,27 @@ dependencies = [
"lindera-ko-dic", "lindera-ko-dic",
"lindera-ko-dic-builder", "lindera-ko-dic-builder",
"lindera-unidic-builder", "lindera-unidic-builder",
"regex",
"serde", "serde",
"serde_json", "serde_json",
"thiserror", "thiserror",
"unicode-blocks",
"unicode-normalization",
"yada",
] ]
[[package]] [[package]]
name = "lindera-cc-cedict-builder" name = "lindera-cc-cedict-builder"
version = "0.17.0" version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8967615a6d85320ec2755e1435c36165467ba01a79026adc3f86dad1b668df3" checksum = "60c3b379251edadbac7a5fdb31e482274e11dae6ab6cc789d0d86cf34369cf49"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
"byteorder", "byteorder",
"clap 3.2.23",
"csv", "csv",
"encoding", "encoding",
"env_logger", "env_logger 0.10.0",
"glob", "glob",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
@ -2195,16 +2192,28 @@ dependencies = [
] ]
[[package]] [[package]]
name = "lindera-core" name = "lindera-compress"
version = "0.17.0" version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e8ed3cea13f73557a4574a179b1518670a3b70bfdad120521313b03cc89380e" checksum = "a8d0ea3de5625e2381cac94e518d3b56103fde56bc0dce840fe875c1e871b125"
dependencies = [
"anyhow",
"flate2",
"lindera-decompress",
]
[[package]]
name = "lindera-core"
version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2281747b98fdd46bcc54ce7fdb6870dad9f67ddb3dc086c47b6704f3e1178cd5"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
"byteorder", "byteorder",
"encoding_rs", "encoding_rs",
"log", "log",
"once_cell",
"serde", "serde",
"thiserror", "thiserror",
"yada", "yada",
@ -2212,20 +2221,20 @@ dependencies = [
[[package]] [[package]]
name = "lindera-decompress" name = "lindera-decompress"
version = "0.17.0" version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2badb41828f89cfa6452db0a66da77897c0a04478304de26c8b2b36613e08d43" checksum = "52101bd454754c506305ab897af5ac2ae41fe91e3272c1ff5c6a02a089dfaefd"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"lzma-rs", "flate2",
"serde", "serde",
] ]
[[package]] [[package]]
name = "lindera-dictionary" name = "lindera-dictionary"
version = "0.17.0" version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e219722c9f56b920c231210e7c25d8b5d35b508e7a2fd69d368916c4b1c926f6" checksum = "af1c6668848f1d30d216c99093a3ed3fe125c105fa12a4aeed5a1861dc01dd52"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@ -2235,15 +2244,16 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic" name = "lindera-ipadic"
version = "0.17.0" version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c8e87c8362c724e8188fb7d9b6d184cac15d01369295e9bff7812b630d57e3b" checksum = "693098007200fa43fd5cdc9ca8740f371327369672ce812cd87a1f6344971e31"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
"encoding", "encoding",
"flate2", "flate2",
"lindera-core", "lindera-core",
"lindera-decompress",
"lindera-ipadic-builder", "lindera-ipadic-builder",
"once_cell", "once_cell",
"tar", "tar",
@ -2251,19 +2261,19 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic-builder" name = "lindera-ipadic-builder"
version = "0.17.0" version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1439e95852e444a116424086dc64d709c90e8af269ff7d2c2c4020f666f8dfab" checksum = "7b6b7240d097a8fc37ee8f90ebff02c4db0ba5325ecb0dacb6da3724596798c9"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
"byteorder", "byteorder",
"clap 3.2.23",
"csv", "csv",
"encoding_rs", "encoding_rs",
"encoding_rs_io", "encoding_rs_io",
"env_logger", "env_logger 0.10.0",
"glob", "glob",
"lindera-compress",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
"log", "log",
@ -2273,15 +2283,16 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ko-dic" name = "lindera-ko-dic"
version = "0.17.0" version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb15f949220da45872d774b7831bb030855ec083435c907499782f8558c8a203" checksum = "abd3c5a4addeb61ca66788a3dd1fd51093e6cd8fea1d997042ada5aa60e8cc5e"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
"encoding", "encoding",
"flate2", "flate2",
"lindera-core", "lindera-core",
"lindera-decompress",
"lindera-ko-dic-builder", "lindera-ko-dic-builder",
"once_cell", "once_cell",
"tar", "tar",
@ -2289,18 +2300,18 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ko-dic-builder" name = "lindera-ko-dic-builder"
version = "0.17.0" version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fde5a7352f4754be4f741e90bf4dff38a12a6572ab3880d0cf688e1166b8d82b" checksum = "512bb1393a9281e0b13704319d1343b7931416865852d9d7b7c0178431518326"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
"byteorder", "byteorder",
"clap 3.2.23",
"csv", "csv",
"encoding", "encoding",
"env_logger", "env_logger 0.10.0",
"glob", "glob",
"lindera-compress",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
"log", "log",
@ -2309,17 +2320,16 @@ dependencies = [
[[package]] [[package]]
name = "lindera-unidic-builder" name = "lindera-unidic-builder"
version = "0.17.0" version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1451b2ed8a7184a5f815d84f99d358c1d67297305831453dfdc0eb5d08e22b5" checksum = "7f575a27f8ba67c15fe16ebf7d277a0ac04e8c8a0f72670ebc2443da9d41c450"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
"byteorder", "byteorder",
"clap 3.2.23",
"csv", "csv",
"encoding", "encoding",
"env_logger", "env_logger 0.10.0",
"glob", "glob",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
@ -2408,16 +2418,6 @@ dependencies = [
"syn 1.0.107", "syn 1.0.107",
] ]
[[package]]
name = "lzma-rs"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aba8ecb0450dfabce4ad72085eed0a75dffe8f21f7ada05638564ea9db2d7fb1"
dependencies = [
"byteorder",
"crc",
]
[[package]] [[package]]
name = "manifest-dir-macros" name = "manifest-dir-macros"
version = "0.1.16" version = "0.1.16"
@ -2475,7 +2475,7 @@ dependencies = [
"deserr", "deserr",
"dump", "dump",
"either", "either",
"env_logger", "env_logger 0.9.3",
"file-store", "file-store",
"flate2", "flate2",
"fst", "fst",
@ -4113,6 +4113,12 @@ version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992"
[[package]]
name = "unicode-blocks"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9de2be6bad6f56ce8373d377e611cbb2265de3a656138065609ce82e217aad70"
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.6" version = "1.0.6"

View File

@ -9,8 +9,7 @@ bimap = { version = "0.6.2", features = ["serde"] }
bincode = "1.3.3" bincode = "1.3.3"
bstr = "1.0.1" bstr = "1.0.1"
byteorder = "1.4.3" byteorder = "1.4.3"
# charabia = { version = "0.7.0", default-features = false } charabia = { version = "0.7.1", default-features = false }
charabia = { git = "https://github.com/meilisearch/charabia", branch = "fix-script-lang-serialization", default-features = false }
concat-arrays = "0.1.2" concat-arrays = "0.1.2"
crossbeam-channel = "0.5.6" crossbeam-channel = "0.5.6"
deserr = "0.1.4" deserr = "0.1.4"