From 4c6fddb1cb2c5ca3b2931135d3e0b955a504247d Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 26 Oct 2023 17:01:10 +0200 Subject: [PATCH] update charabia --- Cargo.lock | 782 +++++++++++++++++++++++++++++++++-- meilisearch-types/Cargo.toml | 3 +- meilisearch/Cargo.toml | 1 + milli/Cargo.toml | 7 +- 4 files changed, 761 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d41708399..bee967723 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -558,7 +558,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05" dependencies = [ "memchr", - "regex-automata", + "regex-automata 0.3.6", "serde", ] @@ -646,6 +646,16 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "calendrical_calculations" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8dfe3bc6a50b4667fafdb6d9cf26731c5418c457e317d8166c972014facf9a5d" +dependencies = [ + "core_maths", + "displaydoc", +] + [[package]] name = "cargo_toml" version = "0.15.3" @@ -699,9 +709,9 @@ dependencies = [ [[package]] name = "charabia" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "098219a776307414866165a03a9cc68c1578764fe3616fe979e1c280790ddd73" +checksum = "ffb924701d850fbf0331302e7f9715c04e494b4b9bebb38ac48bdd30924e1936" dependencies = [ "aho-corasick", "cow-utils", @@ -709,11 +719,15 @@ dependencies = [ "deunicode", "either", "fst", + "icu", + "icu_provider", + "icu_provider_blob", "irg-kvariants", "jieba-rs", "lindera-core", "lindera-dictionary", "lindera-tokenizer", + "litemap 0.6.1", "once_cell", "pinyin", "serde", @@ -721,6 +735,7 @@ dependencies = [ "unicode-normalization", "wana_kana", "whatlang", + "zerovec 0.9.6", ] [[package]] @@ -801,6 +816,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" +[[package]] +name = "cobs" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" + [[package]] name = "colorchoice" version = "1.0.0" @@ -874,6 +895,15 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +[[package]] +name = "core_maths" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3b02505ccb8c50b0aa21ace0fc08c3e53adebd4e58caa18a36152803c7709a3" +dependencies = [ + "libm", +] + [[package]] name = "cow-utils" version = "0.1.2" @@ -1073,6 +1103,15 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "deduplicating_array" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a636096586ca093a10ac0175bfb384d024089dca0dae54e3e69bc1c1596358e8" +dependencies = [ + "serde", +] + [[package]] name = "deranged" version = "0.3.7" @@ -1204,6 +1243,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "displaydoc" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.28", +] + [[package]] name = "dump" version = "1.5.0" @@ -1238,6 +1288,12 @@ dependencies = [ "serde", ] +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + [[package]] name = "encode_unicode" version = "0.3.6" @@ -1447,6 +1503,17 @@ dependencies = [ "unescaper", ] +[[package]] +name = "fixed_decimal" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5287d527037d0f35c8801880361eb38bb9bce194805350052c2a79538388faeb" +dependencies = [ + "displaydoc", + "smallvec", + "writeable", +] + [[package]] name = "flate2" version = "1.0.26" @@ -1873,6 +1940,487 @@ dependencies = [ "tokio-rustls 0.24.1", ] +[[package]] +name = "icu" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30f75f394ebee8d539bef8f6f02ad7b5f41c33de74c9eae1a50337b382a5aab1" +dependencies = [ + "icu_calendar", + "icu_casemap", + "icu_collator", + "icu_collections", + "icu_compactdecimal", + "icu_datetime", + "icu_decimal", + "icu_displaynames", + "icu_list", + "icu_locid", + "icu_locid_transform", + "icu_normalizer", + "icu_plurals", + "icu_properties", + "icu_provider", + "icu_relativetime", + "icu_segmenter", + "icu_timezone", + "icu_transliterate", +] + +[[package]] +name = "icu_calendar" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b520c5675775e3838447c33fc55bf558148c6824ef0d20ff7a9e0df7345a281c" +dependencies = [ + "calendrical_calculations", + "displaydoc", + "icu_calendar_data", + "icu_locid", + "icu_locid_transform", + "icu_provider", + "serde", + "tinystr", + "writeable", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_calendar_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75d8d1a514ca7e6dc547be930f2fd661d578909c07cf1c1adade81c3f7a78840" + +[[package]] +name = "icu_casemap" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "976068d7759293cbd9daa0d1669618bb9094c7ee54e546cd8b877dd4fe59007a" +dependencies = [ + "displaydoc", + "icu_casemap_data", + "icu_collections", + "icu_locid", + "icu_properties", + "icu_provider", + "serde", + "writeable", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_casemap_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1251070c14d5b94cd00f97025e9cedce6a6eeb39485e2a226c58432cc4f72ffd" + +[[package]] +name = "icu_collator" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be493c81154545a00fc5196e814cae0e1470bc696d518b5df877049aa6bcefe1" +dependencies = [ + "displaydoc", + "icu_collator_data", + "icu_collections", + "icu_locid", + "icu_locid_transform", + "icu_normalizer", + "icu_properties", + "icu_provider", + "serde", + "smallvec", + "utf16_iter", + "utf8_iter", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_collator_data" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dbe9abe5ce570ad4707026f37bc21ef95c36b945c3c4564b9aa4e2e1c043126" + +[[package]] +name = "icu_collections" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3907b2246e8dd5a29ead8a965e7c0c8a90e9b928e614a4279257d45c5e553e91" +dependencies = [ + "displaydoc", + "serde", + "yoke", + "zerofrom", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_compactdecimal" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a8bb9143e7681fd5f5877c76f7b6365e173545d00d0e12ef23ba1888a996baa" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_compactdecimal_data", + "icu_decimal", + "icu_locid_transform", + "icu_plurals", + "icu_provider", + "writeable", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_compactdecimal_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2e9b7585f26db531ea5aaedaa68cb66cd2be37fe698b33a289849ff3129545b" + +[[package]] +name = "icu_datetime" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f5bf2e6dd961b59ee5935070220915db6cf0ab5137de362964f800c2b7d14fa" +dependencies = [ + "displaydoc", + "either", + "fixed_decimal", + "icu_calendar", + "icu_datetime_data", + "icu_decimal", + "icu_locid", + "icu_locid_transform", + "icu_plurals", + "icu_provider", + "icu_timezone", + "litemap 0.7.1", + "serde", + "smallvec", + "tinystr", + "writeable", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_datetime_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "078b2ed516a2f5054ee7f55b1fe970b92e90ae4cace8a0fe1e5f9fc2e94be609" + +[[package]] +name = "icu_decimal" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1986a0b7df834aaddb911b4593c990950ac5606fc83ce9aad4311be80f51e81a" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_decimal_data", + "icu_locid", + "icu_locid_transform", + "icu_provider", + "serde", + "writeable", +] + +[[package]] +name = "icu_decimal_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c064b3828953151f8c610bfff6fec776f958641249ebfd1cf36f073f0654e77" + +[[package]] +name = "icu_displaynames" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c98329d348e918ac7e88e6d6613a46bef09ca8a65db4ddf70d86e6eaac0e2ec3" +dependencies = [ + "icu_displaynames_data", + "icu_locid", + "icu_locid_transform", + "icu_provider", + "serde", + "tinystr", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_displaynames_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60f9f56c427f1e80383667e8fb13c07707f6561839283115617cc67307a5d020" + +[[package]] +name = "icu_list" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1a44bbed77a7e7b555f9d7dd4b43f75ec1402b438a901d20451943d50cbd90" +dependencies = [ + "deduplicating_array", + "displaydoc", + "icu_list_data", + "icu_locid_transform", + "icu_provider", + "regex-automata 0.2.0", + "serde", + "writeable", +] + +[[package]] +name = "icu_list_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3237583f0cb7feafabb567c4492fe9ef1d2d4113f6a8798a923273ea5de996d" + +[[package]] +name = "icu_locid" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f284eb342dc49d3e9d9f3b188489d76b5d22dfb1d1a5e0d1941811253bac625c" +dependencies = [ + "displaydoc", + "litemap 0.7.1", + "serde", + "tinystr", + "writeable", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_locid_transform" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6551daf80882d8e68eee186cc19e132d8bde1b1f059a79b93384a5ca0e8fc5e7" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "serde", + "tinystr", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a741eba5431f75eb2f1f9022d3cffabcadda6771e54fb4e77c8ba8653e4da44" + +[[package]] +name = "icu_normalizer" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "080fc33a720d50a7342b0c58df010fbcfb842d6f78ef81555f8b1ac6bba57d3c" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "serde", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f8d22f74066c2e6442db2a9aa14950278e86719e811e304e48bae03094b369d" + +[[package]] +name = "icu_plurals" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20556516b8be2b2f5dc3d6b23884b65c5c59ed8be0b44c419e4808c9b0792fce" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_locid", + "icu_locid_transform", + "icu_plurals_data", + "icu_provider", + "serde", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_plurals_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc552215224997aaaa4e05d95981386d3c52042acebfcc732137d5d9be96a21" + +[[package]] +name = "icu_properties" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3477ae70f8ca8dc08ff7574b5398ed0a2f2e4e6b66bdff2558a92ed67e262be1" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "serde", + "tinystr", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_properties_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c8bb3b67a8347e94d580434369e5c7ee89999b9309d04b7cfc88dfaa0f31b59" + +[[package]] +name = "icu_provider" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68acdef80034b5e35d8524e9817479d389a4f9774f3f0cbe1bf3884d80fd5934" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "postcard", + "serde", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_provider_blob" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31326d28c7f95a964a4f0ee86c24002da5f6db907e3bcb079949b4ff103b6a9" +dependencies = [ + "icu_provider", + "postcard", + "serde", + "writeable", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_provider_macros" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2060258edfcfe32ca7058849bf0f146cb5c59aadbedf480333c0d0002f97bc99" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.28", +] + +[[package]] +name = "icu_relativetime" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4e6c1b531ab35f5b0cb552d3fb8dab1cb49f98e68e12bdc2169ca15e805207c" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_decimal", + "icu_locid_transform", + "icu_plurals", + "icu_provider", + "icu_relativetime_data", + "serde", + "writeable", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_relativetime_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71ec2ca0aff8c6865075c6257bc91d21a77acb6465635306a280af89208bed24" + +[[package]] +name = "icu_segmenter" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcb3c1981ce2187a745f391a741cb14e77453325acb3b2e014b05da51c0a39f2" +dependencies = [ + "core_maths", + "displaydoc", + "icu_collections", + "icu_locid", + "icu_provider", + "icu_segmenter_data", + "serde", + "utf8_iter", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_segmenter_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9703f6713044d1c0a1335a6d78ffece4c9380582416ace6feeb608e84d279fc7" + +[[package]] +name = "icu_timezone" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e6401cd210ccda98b2e7fc707831b29c6efe319efbbec460f957b6f331f626" +dependencies = [ + "displaydoc", + "icu_calendar", + "icu_locid", + "icu_provider", + "icu_timezone_data", + "serde", + "tinystr", + "zerotrie", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_timezone_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d7e214a653bac59b768c42f82d252f13af95e8a9cb07b6108b8bc723c561b43" + +[[package]] +name = "icu_transliterate" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4bdf006774b5a5898d97af6c95b148d34cd5c87cbed00610ff873e5b5885e28" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid", + "icu_normalizer", + "icu_properties", + "icu_provider", + "icu_unicodeset_parse", + "litemap 0.7.1", + "serde", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_unicodeset_parse" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2c3c1ab072cb9ec2dfb377ed7be07bf1bdce055b8324ba6392323f588c38c5a" +dependencies = [ + "icu_collections", + "icu_properties", + "icu_provider", + "tinystr", + "zerovec 0.10.0", +] + [[package]] name = "ident_case" version = "1.0.1" @@ -2177,9 +2725,9 @@ dependencies = [ [[package]] name = "lindera-cc-cedict-builder" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d2e8f2ca97ddf952fe340642511b9c14b373cb2eef711d526bb8ef2ca0969b8" +checksum = "6f567a47e47b5420908424de2c6c5e424e3cafe588d0146bd128c0f3755758a3" dependencies = [ "anyhow", "bincode", @@ -2196,9 +2744,9 @@ dependencies = [ [[package]] name = "lindera-compress" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f72b460559bcbe8a9cee85ea4a5056133ed3abf373031191589236e656d65b59" +checksum = "49f3e553d55ebe9881fa5e5de588b0a153456e93564d17dfbef498912caf63a2" dependencies = [ "anyhow", "flate2", @@ -2207,9 +2755,9 @@ dependencies = [ [[package]] name = "lindera-core" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f586eb8a9393c32d5525e0e9336a3727bd1329674740097126f3b0bff8a1a1ea" +checksum = "a9a2440cc156a4a911a174ec68203543d1efb10df3a700a59b6bf581e453c726" dependencies = [ "anyhow", "bincode", @@ -2224,9 +2772,9 @@ dependencies = [ [[package]] name = "lindera-decompress" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fb1facd8da698072fcc7338bd757730db53d59f313f44dd583fa03681dcc0e1" +checksum = "e077a410e61c962cb526f71b7effd62ffc607488a8f61869c937582d2ccb529b" dependencies = [ "anyhow", "flate2", @@ -2235,9 +2783,9 @@ dependencies = [ [[package]] name = "lindera-dictionary" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec7be7410b1da7017a8948986b87af67082f605e9a716f0989790d795d677f0c" +checksum = "d9f57491adf7b311a3ee87f5e4a36454df16a2ec73de4ef28b2106fac80bd782" dependencies = [ "anyhow", "bincode", @@ -2255,9 +2803,9 @@ dependencies = [ [[package]] name = "lindera-ipadic-builder" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "705d07f8a45d04fd95149f7ad41a26d1f9e56c9c00402be6f9dd05e3d88b99c6" +checksum = "a3476ec7748aebd2eb23d496ddfce5e7e0a5c031cffcd214451043e02d029f11" dependencies = [ "anyhow", "bincode", @@ -2276,9 +2824,9 @@ dependencies = [ [[package]] name = "lindera-ipadic-neologd-builder" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "633a93983ba13fba42328311a501091bd4a7aff0c94ae9eaa9d4733dd2b0468a" +checksum = "7b1c7576a02d5e4af2bf62de51790a01bc4b8bc0d0b6a6b86a46b157f5cb306d" dependencies = [ "anyhow", "bincode", @@ -2297,9 +2845,9 @@ dependencies = [ [[package]] name = "lindera-ko-dic" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a428e0d316b6c86f51bd919479692bc41ad840dba266ebc044663970f431ea18" +checksum = "b713ecd5b827d7d448c3c5eb3c6d5899ecaf22cd17087599996349a02c76828d" dependencies = [ "bincode", "byteorder", @@ -2314,9 +2862,9 @@ dependencies = [ [[package]] name = "lindera-ko-dic-builder" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a5288704c6b8a069c0a1705c38758e836497698b50453373ab3d56c6f9a7ef8" +checksum = "3e545752f6487be87b572529ad594cb3b48d2ef20821516f598b2d152d23277b" dependencies = [ "anyhow", "bincode", @@ -2334,9 +2882,9 @@ dependencies = [ [[package]] name = "lindera-tokenizer" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "106ba439b2e87529d9bbedbb88d69f635baba1195c26502b308f55a85885fc81" +checksum = "24a2d4606a5a4da62ac4a3680ee884a75da7f0c892dc967fc9cb983ceba39a8f" dependencies = [ "bincode", "byteorder", @@ -2349,9 +2897,9 @@ dependencies = [ [[package]] name = "lindera-unidic" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3399b6dcfe1701333451d184ff3c677f433b320153427b146360c9e4bd8cb816" +checksum = "388b1bdf81794b5d5b8057ce0321c58ff4b90d676b637948ccc7863ae2f43d28" dependencies = [ "bincode", "byteorder", @@ -2366,9 +2914,9 @@ dependencies = [ [[package]] name = "lindera-unidic-builder" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b698227fdaeac32289173ab389b990d4eb00a40cbc9912020f69a0c491dabf55" +checksum = "cdfa3e29a22c047da57fadd960ff674b720de15a1e2fb35b5ed67f3408afb469" dependencies = [ "anyhow", "bincode", @@ -2402,6 +2950,21 @@ version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" +[[package]] +name = "litemap" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "575d8a551c59104b4df91269921e5eab561aa1b77c618dac0414b5d44a4617de" + +[[package]] +name = "litemap" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a1a2647d5b7134127971a6de0d533c49de2159167e7f259c427195f87168a1" +dependencies = [ + "serde", +] + [[package]] name = "lmdb-rkv-sys" version = "0.15.1" @@ -3143,6 +3706,17 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "postcard" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a55c51ee6c0db07e68448e336cf8ea4131a620edefebf9893e759b2d793420f8" +dependencies = [ + "cobs", + "embedded-io", + "serde", +] + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -3343,10 +3917,19 @@ checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a" dependencies = [ "aho-corasick", "memchr", - "regex-automata", + "regex-automata 0.3.6", "regex-syntax", ] +[[package]] +name = "regex-automata" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9368763f5a9b804326f3af749e16f9abf378d227bcdee7634b13d8f17793782" +dependencies = [ + "memchr", +] + [[package]] name = "regex-automata" version = "0.3.6" @@ -3767,6 +4350,9 @@ name = "smallvec" version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" +dependencies = [ + "serde", +] [[package]] name = "smartstring" @@ -3882,6 +4468,18 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "synstructure" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "285ba80e733fac80aa4270fbcdf83772a79b80aa35c97075320abfee4a915b06" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.28", + "unicode-xid", +] + [[package]] name = "sysinfo" version = "0.29.7" @@ -3987,6 +4585,17 @@ dependencies = [ "time-core", ] +[[package]] +name = "tinystr" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5d0e245e80bdc9b4e5356fc45a72184abbc3861992603f515270e9340f5a219" +dependencies = [ + "displaydoc", + "serde", + "zerovec 0.10.0", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -4257,12 +4866,24 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "utf16_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52df8b7fb78e7910d776fccf2e42ceaf3604d55e8e7eb2dbd183cb1441d8a692" + [[package]] name = "utf8-width" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" +[[package]] +name = "utf8_iter" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64a8922555b9500e3d865caed19330172cd67cbf82203f1a3311d8c305cc9f33" + [[package]] name = "utf8parse" version = "0.2.1" @@ -4647,6 +5268,18 @@ dependencies = [ "winapi", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0af0c3d13faebf8dda0b5256fa7096a2d5ccb662f7b9f54a40fe201077ab1c2" + [[package]] name = "xattr" version = "1.0.1" @@ -4681,6 +5314,30 @@ dependencies = [ "url", ] +[[package]] +name = "yoke" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61e38c508604d6bbbd292dadb3c02559aa7fff6b654a078a36217cad871636e4" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5e19fb6ed40002bab5403ffa37e53e0e56f914a4450c8765f533018db1db35f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.28", + "synstructure 0.13.0", +] + [[package]] name = "zerocopy" version = "0.3.0" @@ -4699,7 +5356,74 @@ checksum = "d498dbd1fd7beb83c86709ae1c33ca50942889473473d287d56ce4770a18edfb" dependencies = [ "proc-macro2", "syn 1.0.109", - "synstructure", + "synstructure 0.12.6", +] + +[[package]] +name = "zerofrom" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "655b0814c5c0b19ade497851070c640773304939a6c0fd5f5fb43da0696d05b7" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6a647510471d372f2e6c2e6b7219e44d8c574d24fdc11c610a61455782f18c3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.28", + "synstructure 0.13.0", +] + +[[package]] +name = "zerotrie" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9685bb4deb98dab812e87c296a9631fc00d7ca4bc5c2c5f304f375bbed711a8a" +dependencies = [ + "displaydoc", + "litemap 0.7.1", + "serde", + "yoke", + "zerofrom", + "zerovec 0.10.0", +] + +[[package]] +name = "zerovec" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "591691014119b87047ead4dcf3e6adfbf73cb7c38ab6980d4f18a32138f35d46" +dependencies = [ + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1194130c5b155bf8ae50ab16c86ab758cd695cf9ad176d2f870b744cbdbb572e" +dependencies = [ + "serde", + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acabf549809064225ff8878baedc4ce3732ac3b07e7c7ce6e5c2ccdbc485c324" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.28", ] [[package]] diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 147810ef4..639596fa6 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -50,6 +50,7 @@ hebrew = ["milli/hebrew"] japanese = ["milli/japanese"] # thai specialized tokenization thai = ["milli/thai"] - # allow greek specialized tokenization greek = ["milli/greek"] +# allow khmer specialized tokenization +khmer = ["milli/khmer"] diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 35a4a4304..e14116645 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -150,6 +150,7 @@ hebrew = ["meilisearch-types/hebrew"] japanese = ["meilisearch-types/japanese"] thai = ["meilisearch-types/thai"] greek = ["meilisearch-types/greek"] +khmer = ["meilisearch-types/khmer"] [package.metadata.mini-dashboard] assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip" diff --git a/milli/Cargo.toml b/milli/Cargo.toml index b19b40e85..cf5fe9726 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -17,7 +17,7 @@ bincode = "1.3.3" bstr = "1.4.0" bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] } byteorder = "1.4.3" -charabia = { version = "0.8.3", default-features = false } +charabia = { version = "0.8.5", default-features = false } concat-arrays = "0.1.2" crossbeam-channel = "0.5.8" deserr = { version = "0.6.0", features = ["actix-web"]} @@ -82,7 +82,7 @@ md5 = "0.7.0" rand = { version = "0.8.5", features = ["small_rng"] } [features] -all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"] +all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"] # Use POSIX semaphores instead of SysV semaphores in LMDB # For more information on this feature, see heed's Cargo.toml @@ -106,3 +106,6 @@ thai = ["charabia/thai"] # allow greek specialized tokenization greek = ["charabia/greek"] + +# allow khmer specialized tokenization +khmer = ["charabia/khmer"]