update charabia

This commit is contained in:
ManyTheFish 2023-10-26 17:01:10 +02:00
parent ca52021079
commit 4c6fddb1cb
4 changed files with 761 additions and 32 deletions

782
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -50,6 +50,7 @@ hebrew = ["milli/hebrew"]
japanese = ["milli/japanese"] japanese = ["milli/japanese"]
# thai specialized tokenization # thai specialized tokenization
thai = ["milli/thai"] thai = ["milli/thai"]
# allow greek specialized tokenization # allow greek specialized tokenization
greek = ["milli/greek"] greek = ["milli/greek"]
# allow khmer specialized tokenization
khmer = ["milli/khmer"]

View File

@ -150,6 +150,7 @@ hebrew = ["meilisearch-types/hebrew"]
japanese = ["meilisearch-types/japanese"] japanese = ["meilisearch-types/japanese"]
thai = ["meilisearch-types/thai"] thai = ["meilisearch-types/thai"]
greek = ["meilisearch-types/greek"] greek = ["meilisearch-types/greek"]
khmer = ["meilisearch-types/khmer"]
[package.metadata.mini-dashboard] [package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip" assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"

View File

@ -17,7 +17,7 @@ bincode = "1.3.3"
bstr = "1.4.0" bstr = "1.4.0"
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] } bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
byteorder = "1.4.3" byteorder = "1.4.3"
charabia = { version = "0.8.3", default-features = false } charabia = { version = "0.8.5", default-features = false }
concat-arrays = "0.1.2" concat-arrays = "0.1.2"
crossbeam-channel = "0.5.8" crossbeam-channel = "0.5.8"
deserr = { version = "0.6.0", features = ["actix-web"]} deserr = { version = "0.6.0", features = ["actix-web"]}
@ -82,7 +82,7 @@ md5 = "0.7.0"
rand = { version = "0.8.5", features = ["small_rng"] } rand = { version = "0.8.5", features = ["small_rng"] }
[features] [features]
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"] all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]
# Use POSIX semaphores instead of SysV semaphores in LMDB # Use POSIX semaphores instead of SysV semaphores in LMDB
# For more information on this feature, see heed's Cargo.toml # For more information on this feature, see heed's Cargo.toml
@ -106,3 +106,6 @@ thai = ["charabia/thai"]
# allow greek specialized tokenization # allow greek specialized tokenization
greek = ["charabia/greek"] greek = ["charabia/greek"]
# allow khmer specialized tokenization
khmer = ["charabia/khmer"]