From ad52c950ba614f0c7da0f0f4116779bb373c3715 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 9 Oct 2024 11:35:45 +0200 Subject: [PATCH] Only run word pair proximity docids extraction if proximity_precision enables it --- Cargo.lock | 1470 ++++++++--------- index-scheduler/src/batch.rs | 8 +- meili-snap/Cargo.toml | 2 +- milli/Cargo.toml | 4 +- .../index_documents/helpers/grenad_helpers.rs | 2 + milli/src/update/new/channel.rs | 300 +--- milli/src/update/new/extract/cache.rs | 784 ++++++--- milli/src/update/new/extract/documents.rs | 73 + .../new/extract/faceted/extract_facets.rs | 118 +- milli/src/update/new/extract/lru.rs | 234 --- milli/src/update/new/extract/mod.rs | 16 +- .../extract/searchable/extract_word_docids.rs | 319 ++-- .../extract_word_pair_proximity_docids.rs | 22 +- .../src/update/new/extract/searchable/mod.rs | 91 +- .../extract/searchable/tokenize_document.rs | 1 - milli/src/update/new/facet_search_builder.rs | 49 +- milli/src/update/new/indexer/de.rs | 2 +- .../update/new/indexer/document_changes.rs | 25 +- .../update/new/indexer/document_deletion.rs | 15 +- milli/src/update/new/indexer/mod.rs | 406 ++--- milli/src/update/new/indexer/partial_dump.rs | 4 +- .../update/new/indexer/update_by_function.rs | 6 +- milli/src/update/new/merger.rs | 375 ++--- milli/src/update/new/mod.rs | 3 + milli/src/update/new/parallel_iterator_ext.rs | 43 +- milli/src/update/new/words_prefix_docids.rs | 109 +- 26 files changed, 1977 insertions(+), 2504 deletions(-) create mode 100644 milli/src/update/new/extract/documents.rs delete mode 100644 milli/src/update/new/extract/lru.rs diff --git a/Cargo.lock b/Cargo.lock index 5cd1f3976..633fdca8f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,11 +4,11 @@ version = 3 [[package]] name = "actix-codec" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "617a8268e3537fe1d8c9ead925fca49ef6400927ee7bc26750e90ecee14ce4b8" +checksum = "5f7b0a21988c1bf877cf4759ef5ddaac04c1c9fe808c9142ecb78ba97d97a28a" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.6.0", "bytes", "futures-core", "futures-sink", @@ -36,9 +36,9 @@ dependencies = [ [[package]] name = "actix-http" -version = "3.8.0" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ae682f693a9cd7b058f2b0b5d9a6d7728a8555779bedbbc35dd88528611d020" +checksum = "d48f96fc3003717aeb9856ca3d02a8c7de502667ad76eeacd830b48d2e91fac4" dependencies = [ "actix-codec", "actix-rt", @@ -56,7 +56,7 @@ dependencies = [ "flate2", "futures-core", "h2 0.3.26", - "http 0.2.11", + "http 0.2.12", "httparse", "httpdate", "itoa", @@ -80,7 +80,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e01ed3140b2f8d422c68afa1ed2e85d996ea619c988ac834d255db32138655cb" dependencies = [ "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -91,7 +91,7 @@ checksum = "13d324164c51f63867b57e73ba5936ea151b8a41a1d23d1031eeb9f70d0236f8" dependencies = [ "bytestring", "cfg-if", - "http 0.2.11", + "http 0.2.12", "regex-lite", "serde", "tracing", @@ -110,9 +110,9 @@ dependencies = [ [[package]] name = "actix-server" -version = "2.2.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e8613a75dd50cc45f473cee3c34d59ed677c0f7b44480ce3b8247d7dc519327" +checksum = "7ca2549781d8dd6d75c40cf6b6051260a2cc2f3c62343d761a969a0640646894" dependencies = [ "actix-rt", "actix-service", @@ -120,8 +120,7 @@ dependencies = [ "futures-core", "futures-util", "mio", - "num_cpus", - "socket2 0.4.9", + "socket2", "tokio", "tracing", ] @@ -168,9 +167,9 @@ dependencies = [ [[package]] name = "actix-web" -version = "4.8.0" +version = "4.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1988c02af8d2b718c05bc4aeb6a66395b7cdf32858c2c71131e5637a8c05a9ff" +checksum = "9180d76e5cc7ccbc4d60a506f2c727730b154010262df5b910eb17dbe4b8cb38" dependencies = [ "actix-codec", "actix-http", @@ -191,6 +190,7 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", + "impl-more", "itoa", "language-tags", "log", @@ -202,7 +202,7 @@ dependencies = [ "serde_json", "serde_urlencoded", "smallvec", - "socket2 0.5.5", + "socket2", "time", "url", ] @@ -216,23 +216,23 @@ dependencies = [ "actix-router", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] name = "addr2line" -version = "0.20.0" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" dependencies = [ "gimli", ] [[package]] -name = "adler" -version = "1.0.2" +name = "adler2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "aes" @@ -308,57 +308,58 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.13" +version = "0.6.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" +checksum = "23a1e53f0f5d86382dafe1cf314783b2044280f406e7e1506368220ad11b1338" dependencies = [ "anstyle", "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", + "is_terminal_polyfill", "utf8parse", ] [[package]] name = "anstyle" -version = "1.0.6" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" +checksum = "8365de52b16c035ff4fcafe0092ba9390540e3e352870ac09933bebcaa2c8c56" [[package]] name = "anstyle-parse" -version = "0.2.1" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.0.0" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.1" +version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" dependencies = [ "anstyle", - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] name = "anyhow" -version = "1.0.86" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +checksum = "c042108f3ed77fd83760a5fd79b53be043192bb3b9dba91d8c574c0ada7850c8" dependencies = [ "backtrace", ] @@ -380,9 +381,9 @@ dependencies = [ [[package]] name = "arrayvec" -version = "0.7.4" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arroy" @@ -415,13 +416,13 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.81" +version = "0.1.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -432,23 +433,23 @@ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] name = "autocfg" -version = "1.2.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "backtrace" -version = "0.3.68" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" dependencies = [ "addr2line", - "cc", "cfg-if", "libc", "miniz_oxide", "object", "rustc-demangle", + "windows-targets 0.52.6", ] [[package]] @@ -514,22 +515,20 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.69.4" +version = "0.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f" dependencies = [ "bitflags 2.6.0", "cexpr", "clang-sys", - "itertools 0.12.1", - "lazy_static", - "lazycell", + "itertools 0.13.0", "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 1.1.0", "shlex", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -614,7 +613,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", "syn_derive", ] @@ -641,9 +640,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.9.1" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" +checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c" dependencies = [ "memchr", "regex-automata", @@ -704,28 +703,28 @@ dependencies = [ [[package]] name = "bytecount" -version = "0.6.3" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" +checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" [[package]] name = "bytemuck" -version = "1.16.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" +checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d" dependencies = [ "bytemuck_derive", ] [[package]] name = "bytemuck_derive" -version = "1.6.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4da9a32f3fed317401fa3c862968128267c3106685286e15d5aaa3d7389c2f60" +checksum = "bcfcc3cd946cb52f0bbfdbbcfa2f4e24f75ebb6c0e1002f7c25904fada18b9ec" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -736,15 +735,15 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.6.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" [[package]] name = "bytestring" -version = "1.3.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "238e4886760d98c4f899360c834fa93e62cf7f721ac3c2da375cbdf4b8679aae" +checksum = "74d80203ea6b29df88012294f62733de21cfeab47f17b41af3a38bc30a03ee72" dependencies = [ "bytes", ] @@ -772,9 +771,9 @@ dependencies = [ [[package]] name = "camino" -version = "1.1.6" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c59e92b5a388f549b863a7bea62612c09f24c8393560709a54558a9abdfb3b9c" +checksum = "8b96ec4966b5813e2c0507c1f86115c8c5abaadc3980879c3424042a02fd1ad3" dependencies = [ "serde", ] @@ -789,7 +788,7 @@ dependencies = [ "candle-kernels", "cudarc", "gemm", - "half 2.4.0", + "half", "memmap2", "num-traits", "num_cpus", @@ -818,7 +817,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b006b30f66a0d94fc9cef0ac4de6ce510565f35ae2c6c35ce5d4aacfb0fc8eeb" dependencies = [ "candle-core", - "half 2.4.0", + "half", "num-traits", "rayon", "safetensors", @@ -847,9 +846,9 @@ dependencies = [ [[package]] name = "cargo-platform" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ceed8ef69d8518a5dda55c07425450b58a4e1946f4951eab6d7191ee86c2443d" +checksum = "24b1f0365a6c6bb4020cd05806fd0d33c44d38046b8bd7f0e40814b9763cabfc" dependencies = [ "serde", ] @@ -870,9 +869,9 @@ dependencies = [ [[package]] name = "cargo_toml" -version = "0.20.3" +version = "0.20.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4895c018bb228aa6b3ba1a0285543fcb4b704734c3fb1f72afaa75aa769500c1" +checksum = "88da5a13c620b4ca0078845707ea9c3faf11edbc3ffd8497d11d686211cd1ac0" dependencies = [ "serde", "toml", @@ -886,13 +885,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.0.104" +version = "1.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74b6a57f98764a267ff415d50a25e6e166f3831a5071af4995296ea97d210490" +checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f" dependencies = [ "jobserver", "libc", - "once_cell", + "shlex", ] [[package]] @@ -958,9 +957,9 @@ dependencies = [ [[package]] name = "ciborium" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" dependencies = [ "ciborium-io", "ciborium-ll", @@ -969,18 +968,18 @@ dependencies = [ [[package]] name = "ciborium-io" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" [[package]] name = "ciborium-ll" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" dependencies = [ "ciborium-io", - "half 1.8.2", + "half", ] [[package]] @@ -995,9 +994,9 @@ dependencies = [ [[package]] name = "clang-sys" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" dependencies = [ "glob", "libc", @@ -1006,9 +1005,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.9" +version = "4.5.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64acc1846d54c1fe936a78dc189c34e28d3f5afc348403f28ecf53660b9b8462" +checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" dependencies = [ "clap_builder", "clap_derive", @@ -1016,9 +1015,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.9" +version = "4.5.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb8393d67ba2e7bfaf28a23458e4e2b543cc73a99595511eb207fdb8aede942" +checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" dependencies = [ "anstream", "anstyle", @@ -1028,21 +1027,21 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.8" +version = "4.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085" +checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] name = "clap_lex" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" [[package]] name = "color-spantrace" @@ -1058,9 +1057,9 @@ dependencies = [ [[package]] name = "colorchoice" -version = "1.0.0" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" [[package]] name = "concat-arrays" @@ -1075,15 +1074,15 @@ dependencies = [ [[package]] name = "console" -version = "0.15.7" +version = "0.15.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" dependencies = [ "encode_unicode", "lazy_static", "libc", "unicode-width", - "windows-sys 0.45.0", + "windows-sys 0.52.0", ] [[package]] @@ -1108,9 +1107,9 @@ dependencies = [ [[package]] name = "constant_time_eq" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" [[package]] name = "convert_case" @@ -1140,15 +1139,15 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.4" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cpufeatures" -version = "0.2.12" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" dependencies = [ "libc", ] @@ -1308,11 +1307,11 @@ dependencies = [ [[package]] name = "cudarc" -version = "0.11.7" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56ee2a3fbbd981e1c7ea73cc2af136e754eb22d17436de37155227ee4dbe0cf4" +checksum = "7a5bd4d1eee570c3b2ac64ed114125517dd1e541d88dd28fc259f1de4dba8d60" dependencies = [ - "half 2.4.0", + "half", "libloading", ] @@ -1328,12 +1327,12 @@ dependencies = [ [[package]] name = "darling" -version = "0.20.9" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83b2eb4d90d12bdda5ed17de686c2acb4c57914f8f921b8da7e112b5a36f3fe1" +checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" dependencies = [ - "darling_core 0.20.9", - "darling_macro 0.20.9", + "darling_core 0.20.10", + "darling_macro 0.20.10", ] [[package]] @@ -1352,16 +1351,16 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.20.9" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622687fe0bac72a04e5599029151f5796111b90f1baaa9b544d807a5e31cd120" +checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" dependencies = [ "fnv", "ident_case", "proc-macro2", "quote", "strsim 0.11.1", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -1377,13 +1376,13 @@ dependencies = [ [[package]] name = "darling_macro" -version = "0.20.9" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178" +checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ - "darling_core 0.20.9", + "darling_core 0.20.10", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -1415,9 +1414,9 @@ dependencies = [ [[package]] name = "deflate64" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83ace6c86376be0b6cdcf3fb41882e81d94b31587573d1cfa9d01cd06bba210d" +checksum = "da692b8d1080ea3045efaab14434d40468c3d8657e42abddfffca87b428f4c1b" [[package]] name = "deranged" @@ -1437,7 +1436,7 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -1451,11 +1450,11 @@ dependencies = [ [[package]] name = "derive_builder" -version = "0.20.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" dependencies = [ - "derive_builder_macro 0.20.0", + "derive_builder_macro 0.20.2", ] [[package]] @@ -1472,14 +1471,14 @@ dependencies = [ [[package]] name = "derive_builder_core" -version = "0.20.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" dependencies = [ - "darling 0.20.9", + "darling 0.20.10", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -1494,25 +1493,25 @@ dependencies = [ [[package]] name = "derive_builder_macro" -version = "0.20.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ - "derive_builder_core 0.20.0", - "syn 2.0.60", + "derive_builder_core 0.20.2", + "syn 2.0.85", ] [[package]] name = "derive_more" -version = "0.99.17" +version = "0.99.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" dependencies = [ "convert_case 0.4.0", "proc-macro2", "quote", "rustc_version", - "syn 1.0.109", + "syn 2.0.85", ] [[package]] @@ -1541,7 +1540,7 @@ dependencies = [ "convert_case 0.6.0", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -1605,7 +1604,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -1738,9 +1737,9 @@ checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569" [[package]] name = "encoding_rs" -version = "0.8.33" +version = "0.8.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" dependencies = [ "cfg-if", ] @@ -1756,14 +1755,14 @@ dependencies = [ [[package]] name = "enum-as-inner" -version = "0.6.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a" +checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc" dependencies = [ - "heck 0.4.1", + "heck", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -1783,7 +1782,7 @@ checksum = "a1ab991c1362ac86c61ab6f556cff143daa22e5a15e4e189df818b2fd19fe65b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -1794,9 +1793,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" dependencies = [ "libc", "windows-sys 0.52.0", @@ -1831,9 +1830,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" [[package]] name = "file-store" @@ -1847,14 +1846,14 @@ dependencies = [ [[package]] name = "filetime" -version = "0.2.22" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4029edd3e734da6fe05b6cd7bd2960760a616bd2ddd0d59a0124746d6272af0" +checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.3.5", - "windows-sys 0.48.0", + "libredox", + "windows-sys 0.59.0", ] [[package]] @@ -1869,9 +1868,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.30" +version = "1.0.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" dependencies = [ "crc32fast", "miniz_oxide", @@ -1920,9 +1919,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] name = "futures" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" dependencies = [ "futures-channel", "futures-core", @@ -1935,9 +1934,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" dependencies = [ "futures-core", "futures-sink", @@ -1945,15 +1944,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] name = "futures-executor" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" dependencies = [ "futures-core", "futures-task", @@ -1962,38 +1961,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" [[package]] name = "futures-macro" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] name = "futures-sink" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" [[package]] name = "futures-task" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" [[package]] name = "futures-util" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ "futures-channel", "futures-core", @@ -2101,7 +2100,7 @@ checksum = "a2e7ea062c987abcd8db95db917b4ffb4ecdfd0668471d8dc54734fdff2354e8" dependencies = [ "bytemuck", "dyn-stack", - "half 2.4.0", + "half", "num-complex", "num-traits", "once_cell", @@ -2122,7 +2121,7 @@ dependencies = [ "dyn-stack", "gemm-common", "gemm-f32", - "half 2.4.0", + "half", "num-complex", "num-traits", "paste", @@ -2179,9 +2178,9 @@ checksum = "36d244a08113319b5ebcabad2b8b7925732d15eec46d7e7ac3c11734f3b7a6ad" [[package]] name = "getrandom" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "js-sys", @@ -2190,23 +2189,11 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "getset" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e45727250e75cc04ff2846a66397da8ef2b3db8e40e0cef4df67950a07621eb9" -dependencies = [ - "proc-macro-error", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "gimli" -version = "0.27.3" +version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "git2" @@ -2250,7 +2237,7 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http 0.2.11", + "http 0.2.12", "indexmap", "slab", "tokio", @@ -2260,9 +2247,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab" +checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" dependencies = [ "atomic-waker", "bytes", @@ -2279,15 +2266,9 @@ dependencies = [ [[package]] name = "half" -version = "1.8.2" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" - -[[package]] -name = "half" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5eceaaeec696539ddaf7b333340f1af35a5aa87ae3e4f3ead0532f72affab2e" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" dependencies = [ "bytemuck", "cfg-if", @@ -2347,12 +2328,6 @@ dependencies = [ "stable_deref_trait", ] -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - [[package]] name = "heck" version = "0.5.0" @@ -2361,9 +2336,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "heed" -version = "0.20.3" +version = "0.20.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bc30da4a93ff8cb98e535d595d6de42731d4719d707bc1c86f579158751a24e" +checksum = "7d4f449bab7320c56003d37732a917e18798e2f1709d80263face2b4f9436ddb" dependencies = [ "bitflags 2.6.0", "byteorder", @@ -2402,6 +2377,12 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +[[package]] +name = "hermit-abi" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" + [[package]] name = "hex" version = "0.4.3" @@ -2435,9 +2416,9 @@ dependencies = [ [[package]] name = "http" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8947b1a6fad4393052c7ba1f4cd97bed3e953a95c79c92ad9b051a04611d9fbb" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" dependencies = [ "bytes", "fnv", @@ -2457,9 +2438,9 @@ dependencies = [ [[package]] name = "http-body" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", "http 1.1.0", @@ -2480,26 +2461,26 @@ dependencies = [ [[package]] name = "httparse" -version = "1.8.0" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" +checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" [[package]] name = "httpdate" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hyper" -version = "1.4.1" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05" +checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a" dependencies = [ "bytes", "futures-channel", "futures-util", - "h2 0.4.5", + "h2 0.4.6", "http 1.1.0", "http-body", "httparse", @@ -2513,9 +2494,9 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.2" +version = "0.27.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155" +checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" dependencies = [ "futures-util", "http 1.1.0", @@ -2531,9 +2512,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.6" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ab92f4f49ee4fb4f997c784b7a2e0fa70050211e0b6a287f898c3c9785ca956" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" dependencies = [ "bytes", "futures-channel", @@ -2542,9 +2523,8 @@ dependencies = [ "http-body", "hyper", "pin-project-lite", - "socket2 0.5.5", + "socket2", "tokio", - "tower", "tower-service", "tracing", ] @@ -2567,9 +2547,9 @@ dependencies = [ [[package]] name = "impl-more" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d" +checksum = "aae21c3177a27788957044151cc2800043d127acaa460a47ebb9b84dfa2c6aa0" [[package]] name = "index-scheduler" @@ -2582,7 +2562,7 @@ dependencies = [ "bumpalo", "crossbeam", "csv", - "derive_builder 0.20.0", + "derive_builder 0.20.2", "dump", "enum-iterator", "file-store", @@ -2609,20 +2589,20 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.6" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" dependencies = [ "equivalent", - "hashbrown 0.14.5", + "hashbrown 0.15.0", "serde", ] [[package]] name = "indicatif" -version = "0.17.7" +version = "0.17.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb28741c9db9a713d93deb3bb9515c20788cef5815265bee4980e87bde7e0f25" +checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3" dependencies = [ "console", "instant", @@ -2657,18 +2637,18 @@ dependencies = [ [[package]] name = "instant" -version = "0.1.12" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" dependencies = [ "cfg-if", ] [[package]] name = "ipnet" -version = "2.8.0" +version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6" +checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" [[package]] name = "irg-kvariants" @@ -2682,15 +2662,21 @@ dependencies = [ [[package]] name = "is-terminal" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" +checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" dependencies = [ - "hermit-abi", + "hermit-abi 0.4.0", "libc", "windows-sys 0.52.0", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + [[package]] name = "itertools" version = "0.10.5" @@ -2740,7 +2726,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1e2b0210dc78b49337af9e49d7ae41a39dceac6e5985613f1cf7763e2f76a25" dependencies = [ "cedarwood", - "derive_builder 0.20.0", + "derive_builder 0.20.2", "fxhash", "lazy_static", "phf", @@ -2750,18 +2736,18 @@ dependencies = [ [[package]] name = "jobserver" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" dependencies = [ "libc", ] [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" dependencies = [ "wasm-bindgen", ] @@ -2800,9 +2786,9 @@ dependencies = [ [[package]] name = "kstring" -version = "2.0.0" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3066350882a1cd6d950d055997f379ac37fd39f81cd4d8ed186032eb3c5747" +checksum = "558bf9508a558512042d3095138b1f7b8fe90c5467d94f9f1da28b3731c5dbd1" dependencies = [ "serde", "static_assertions", @@ -2820,12 +2806,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - [[package]] name = "levenshtein_automata" version = "0.2.1" @@ -2837,9 +2817,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.155" +version = "0.2.161" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" [[package]] name = "libgit2-sys" @@ -2855,19 +2835,19 @@ dependencies = [ [[package]] name = "libloading" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d" +checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if", - "windows-targets 0.52.4", + "windows-targets 0.48.5", ] [[package]] name = "libm" -version = "0.2.8" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" [[package]] name = "libmimalloc-sys" @@ -2881,9 +2861,9 @@ dependencies = [ [[package]] name = "libproc" -version = "0.14.8" +version = "0.14.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae9ea4b75e1a81675429dafe43441df1caea70081e82246a8cccf514884a88bb" +checksum = "e78a09b56be5adbcad5aa1197371688dc6bb249a26da3bca2011ee2fb987ebfb" dependencies = [ "bindgen", "errno", @@ -2891,10 +2871,21 @@ dependencies = [ ] [[package]] -name = "libz-sys" -version = "1.1.15" +name = "libredox" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" +dependencies = [ + "bitflags 2.6.0", + "libc", + "redox_syscall", +] + +[[package]] +name = "libz-sys" +version = "1.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2d16453e800a8cf6dd2fc3eb4bc99b786a9b90c663b8559a5b1a041bf89e472" dependencies = [ "cc", "libc", @@ -3058,7 +3049,7 @@ dependencies = [ "bincode", "byteorder", "csv", - "derive_builder 0.20.0", + "derive_builder 0.20.2", "encoding", "encoding_rs", "encoding_rs_io", @@ -3223,15 +3214,15 @@ checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" [[package]] name = "linux-raw-sys" -version = "0.4.12" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "liquid" -version = "0.26.6" +version = "0.26.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10929f201279ba14da3297b957dcda1e0bf7a6f3bb5115688be684aa8864e9cc" +checksum = "7cdcc72b82748f47c2933c172313f5a9aea5b2c4eb3fa4c66b4ea55bb60bb4b1" dependencies = [ "doc-comment", "liquid-core", @@ -3242,12 +3233,12 @@ dependencies = [ [[package]] name = "liquid-core" -version = "0.26.6" +version = "0.26.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3aef4b2160791f456eb880c990a97746f693746f92302ef5f1d06111cf14b768" +checksum = "2752e978ffc53670f3f2e8b3ef09f348d6f7b5474a3be3f8a5befe5382e4effb" dependencies = [ "anymap2", - "itertools 0.12.1", + "itertools 0.13.0", "kstring", "liquid-derive", "num-traits", @@ -3260,22 +3251,22 @@ dependencies = [ [[package]] name = "liquid-derive" -version = "0.26.5" +version = "0.26.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "915f6d0a2963a27cd5205c1902f32ddfe3bc035816afd268cf88c0fc0f8d287e" +checksum = "3b51f1d220e3fa869e24cfd75915efe3164bd09bb11b3165db3f37f57bf673e3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] name = "liquid-lib" -version = "0.26.6" +version = "0.26.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73f48fc446873f74d869582f5c4b8cbf3248c93395e410a67af5809b3731e44a" +checksum = "59b1a298d3d2287ee5b1e43840d885b8fdfc37d3f4e90d82aacfd04d021618da" dependencies = [ - "itertools 0.12.1", + "itertools 0.13.0", "liquid-core", "once_cell", "percent-encoding", @@ -3286,9 +3277,9 @@ dependencies = [ [[package]] name = "lmdb-master-sys" -version = "0.2.2" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57640c190703d5ccf4a86aff4aeb749b2d287a8cb1723c76b51f39d77ab53b24" +checksum = "472c3760e2a8d0f61f322fb36788021bb36d573c502b50fa3e2bcaac3ec326c9" dependencies = [ "cc", "doxygen-rs", @@ -3297,27 +3288,26 @@ dependencies = [ [[package]] name = "local-channel" -version = "0.1.3" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f303ec0e94c6c54447f84f3b0ef7af769858a9c4ef56ef2a986d3dcd4c3fc9c" +checksum = "b6cbc85e69b8df4b8bb8b89ec634e7189099cea8927a276b7384ce5488e53ec8" dependencies = [ "futures-core", "futures-sink", - "futures-util", "local-waker", ] [[package]] name = "local-waker" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e34f76eb3611940e0e7d53a9aaa4e6a3151f69541a282fd0dad5571420c53ff1" +checksum = "4d873d7c67ce09b42110d801813efbc9364414e356be9935700d368351657487" [[package]] name = "lock_api" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" dependencies = [ "autocfg", "scopeguard", @@ -3331,9 +3321,9 @@ checksum = "9374ef4228402d4b7e403e5838cb880d9ee663314b0a900d5a6aabf0c213552e" [[package]] name = "log" -version = "0.4.21" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "lzma-rs" @@ -3370,7 +3360,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -3480,7 +3470,7 @@ dependencies = [ "uuid", "wiremock", "yaup", - "zip 2.1.3", + "zip 2.2.0", ] [[package]] @@ -3555,9 +3545,9 @@ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "memmap2" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" +checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" dependencies = [ "libc", "stable_deref_trait", @@ -3567,6 +3557,7 @@ dependencies = [ name = "milli" version = "1.11.0" dependencies = [ + "allocator-api2", "arroy", "big_s", "bimap", @@ -3590,7 +3581,7 @@ dependencies = [ "fxhash", "geoutils", "grenad", - "hashbrown 0.14.5", + "hashbrown 0.15.0", "heed", "hf-hub", "indexmap", @@ -3615,6 +3606,7 @@ dependencies = [ "rhai", "roaring", "rstar", + "rustc-hash 2.0.0", "serde", "serde_json", "slice-group-by", @@ -3650,9 +3642,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "mime_guess" -version = "2.0.4" +version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef" +checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" dependencies = [ "mime", "unicase", @@ -3666,30 +3658,31 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.2" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" dependencies = [ - "adler", + "adler2", ] [[package]] name = "mio" -version = "0.8.11" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" dependencies = [ + "hermit-abi 0.3.9", "libc", "log", "wasi", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "monostate" -version = "0.1.9" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15f370ae88093ec6b11a710dec51321a61d420fafd1bad6e30d01bd9c920e8ee" +checksum = "0d208407d7552cd041d8cdb69a1bc3303e029c598738177a3d87082004dc0e1e" dependencies = [ "monostate-impl", "serde", @@ -3697,20 +3690,20 @@ dependencies = [ [[package]] name = "monostate-impl" -version = "0.1.9" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "371717c0a5543d6a800cac822eac735aa7d2d2fbb41002e9856a4089532dbdce" +checksum = "a7ce64b975ed4f123575d11afd9491f2e37bbd5813fbfbc0f09ae1fbddea74e0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] name = "mutually_exclusive_features" -version = "0.0.3" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e" +checksum = "e94e1e6445d314f972ff7395df2de295fe51b71821694f0b0e1e79c4f12c8577" [[package]] name = "nohash" @@ -3760,20 +3753,19 @@ dependencies = [ [[package]] name = "num-bigint" -version = "0.4.3" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ - "autocfg", "num-integer", "num-traits", ] [[package]] name = "num-complex" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" dependencies = [ "bytemuck", "num-traits", @@ -3787,19 +3779,18 @@ checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" [[package]] name = "num-integer" -version = "0.1.45" +version = "0.1.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" dependencies = [ - "autocfg", "num-traits", ] [[package]] name = "num-traits" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", "libm", @@ -3811,29 +3802,29 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.9", "libc", ] [[package]] name = "num_enum" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02339744ee7253741199f897151b38e72257d13802d4ee837285cc2990a90845" +checksum = "4e613fc340b2220f734a8595782c551f1250e969d87d3be1ae0579e8d4065179" dependencies = [ "num_enum_derive", ] [[package]] name = "num_enum_derive" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "681030a937600a36906c185595136d26abfebb4aa9c65701cefcaf8578bb982b" +checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56" dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -3853,9 +3844,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "object" -version = "0.31.1" +version = "0.36.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1" +checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" dependencies = [ "memchr", ] @@ -3867,9 +3858,9 @@ source = "git+https://github.com/kerollmops/obkv?branch=unsized-kvreader#ce53587 [[package]] name = "once_cell" -version = "1.19.0" +version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "onig" @@ -3895,9 +3886,9 @@ dependencies = [ [[package]] name = "oorandom" -version = "11.1.3" +version = "11.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" +checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" [[package]] name = "option-ext" @@ -3907,9 +3898,9 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] name = "ordered-float" -version = "4.2.1" +version = "4.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19ff2cf528c6c03d9ed653d6c4ce1dc0582dc4af309790ad92f07c1cd551b0be" +checksum = "83e7ccb95e240b7c9506a3d544f10d935e142cc90b0a1d56954fb44d89ad6b97" dependencies = [ "num-traits", ] @@ -3948,22 +3939,22 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.8" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.3.5", + "redox_syscall", "smallvec", - "windows-targets 0.48.1", + "windows-targets 0.52.6", ] [[package]] name = "paste" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "path-matchers" @@ -3992,11 +3983,11 @@ dependencies = [ [[package]] name = "pem" -version = "3.0.3" +version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8fcc794035347fb64beda2d3b462595dd2753e3f268d89c5aae77e8cf2c310" +checksum = "8e459365e590736a54c3fa561947c84837534b8e9af6fc5bf781307e82658fae" dependencies = [ - "base64 0.21.7", + "base64 0.22.1", "serde", ] @@ -4016,19 +4007,20 @@ dependencies = [ [[package]] name = "pest" -version = "2.7.2" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1acb4a4365a13f749a93f1a094a7805e5cfa0955373a9de860d962eaa3a5fe5a" +checksum = "879952a81a83930934cbf1786752d6dedc3b1f29e8f8fb2ad1d0a36f377cf442" dependencies = [ + "memchr", "thiserror", "ucd-trie", ] [[package]] name = "pest_derive" -version = "2.7.2" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "666d00490d4ac815001da55838c500eafb0320019bbaa44444137c48b443a853" +checksum = "d214365f632b123a47fd913301e14c946c61d1c183ee245fa76eb752e59a02dd" dependencies = [ "pest", "pest_generator", @@ -4036,22 +4028,22 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.7.2" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68ca01446f50dbda87c1786af8770d535423fa8a53aec03b8f4e3d7eb10e0929" +checksum = "eb55586734301717aea2ac313f50b2eb8f60d2fc3dc01d190eefa2e625f60c4e" dependencies = [ "pest", "pest_meta", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] name = "pest_meta" -version = "2.7.2" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56af0a30af74d0445c0bf6d9d051c979b516a1a5af790d251daee76005420a48" +checksum = "b75da2a70cf4d9cb76833c990ac9cd3923c9a8905a8929789ce347c84564d03d" dependencies = [ "once_cell", "pest", @@ -4098,7 +4090,7 @@ dependencies = [ "phf_shared", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -4112,29 +4104,29 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.4" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0302c4a0442c456bd56f841aee5c3bfd17967563f6fadc9ceb9f9c23cf3807e0" +checksum = "be57f64e946e500c8ee36ef6331845d40a93055567ec57e8fae13efd33759b95" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.4" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690" +checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] name = "pin-project-lite" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" [[package]] name = "pin-utils" @@ -4150,9 +4142,9 @@ checksum = "16f2611cd06a1ac239a0cea4521de9eb068a6ca110324ee00631aa68daa74fc0" [[package]] name = "pkg-config" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" [[package]] name = "platform-dirs" @@ -4165,9 +4157,9 @@ dependencies = [ [[package]] name = "plotters" -version = "0.3.5" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" dependencies = [ "num-traits", "plotters-backend", @@ -4178,24 +4170,24 @@ dependencies = [ [[package]] name = "plotters-backend" -version = "0.3.5" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" [[package]] name = "plotters-svg" -version = "0.3.5" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" dependencies = [ "plotters-backend", ] [[package]] name = "portable-atomic" -version = "1.5.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bccab0e7fd7cc19f820a1c8c91720af652d0c88dc9664dd72aef2614f04af3b" +checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2" [[package]] name = "powerfmt" @@ -4205,17 +4197,20 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" [[package]] name = "ppv-lite86" -version = "0.2.17" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] [[package]] name = "proc-macro-crate" -version = "3.1.0" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284" +checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" dependencies = [ - "toml_edit 0.21.0", + "toml_edit", ] [[package]] @@ -4227,7 +4222,6 @@ dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", - "syn 1.0.109", "version_check", ] @@ -4244,9 +4238,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.81" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" dependencies = [ "unicode-ident", ] @@ -4319,9 +4313,9 @@ dependencies = [ [[package]] name = "pulp" -version = "0.18.9" +version = "0.18.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03457ac216146f43f921500bac4e892d5cd32b0479b929cbfc90f95cd6c599c2" +checksum = "a0a01a0dc67cf4558d279f0c25b0962bd08fc6dec0137699eae304103e882fe6" dependencies = [ "bytemuck", "libm", @@ -4331,16 +4325,17 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.2" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4ceeeeabace7857413798eb1ffa1e9c905a9946a57d81fb69b4b71c4d8eb3ad" +checksum = "8c7c5fdde3cdae7203427dc4f0a68fe0ed09833edc525a03456b153b79828684" dependencies = [ "bytes", "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash", + "rustc-hash 2.0.0", "rustls", + "socket2", "thiserror", "tokio", "tracing", @@ -4348,14 +4343,14 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.3" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddf517c03a109db8100448a4be38d498df8a210a99fe0e1b9eaf39e78c640efe" +checksum = "fadfaed2cd7f389d0161bb73eeb07b7b78f8691047a6f3e73caaeae55310a4a6" dependencies = [ "bytes", "rand", "ring", - "rustc-hash", + "rustc-hash 2.0.0", "rustls", "slab", "thiserror", @@ -4365,22 +4360,23 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.2" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9096629c45860fc7fb143e125eb826b5e721e10be3263160c7d60ca832cf8c46" +checksum = "e346e016eacfff12233c243718197ca12f148c84e1e84268a896699b41c71780" dependencies = [ + "cfg_aliases", "libc", "once_cell", - "socket2 0.5.5", + "socket2", "tracing", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "quote" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] @@ -4434,7 +4430,7 @@ dependencies = [ [[package]] name = "raw-collections" version = "0.1.0" -source = "git+https://github.com/dureuill/raw-collections.git#0ecd143c1707d237e3c4d749bc685418da2fccc2" +source = "git+https://github.com/dureuill/raw-collections.git#4ab9619207632c20f4e0c2e126d9d909cc58ef65" dependencies = [ "allocator-api2", "bumpalo", @@ -4500,38 +4496,29 @@ checksum = "03251193000f4bd3b042892be858ee50e8b3719f2b08e5833ac4353724632430" [[package]] name = "redox_syscall" -version = "0.2.16" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "redox_syscall" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" -dependencies = [ - "bitflags 1.3.2", + "bitflags 2.6.0", ] [[package]] name = "redox_users" -version = "0.4.3" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ "getrandom", - "redox_syscall 0.2.16", + "libredox", "thiserror", ] [[package]] name = "regex" -version = "1.10.5" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -4541,9 +4528,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" dependencies = [ "aho-corasick", "memchr", @@ -4558,9 +4545,9 @@ checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" [[package]] name = "regex-syntax" -version = "0.8.2" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rend" @@ -4573,9 +4560,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.5" +version = "0.12.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7d6d2a27d57148378eb5e111173f4276ad26340ecc5c49a4a2152167a2d6a37" +checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" dependencies = [ "base64 0.22.1", "bytes", @@ -4613,7 +4600,7 @@ dependencies = [ "wasm-streams", "web-sys", "webpki-roots", - "winreg", + "windows-registry", ] [[package]] @@ -4640,7 +4627,7 @@ source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -4660,9 +4647,9 @@ dependencies = [ [[package]] name = "rkyv" -version = "0.7.44" +version = "0.7.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cba464629b3394fc4dbc6f940ff8f5b4ff5c7aef40f29166fd4ad12acbc99c0" +checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" dependencies = [ "bitvec", "bytecheck", @@ -4678,9 +4665,9 @@ dependencies = [ [[package]] name = "rkyv_derive" -version = "0.7.44" +version = "0.7.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7dddfff8de25e6f62b9d64e6e432bf1c6736c57d20323e15ee10435fbda7c65" +checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" dependencies = [ "proc-macro2", "quote", @@ -4711,9 +4698,9 @@ dependencies = [ [[package]] name = "rust_decimal" -version = "1.35.0" +version = "1.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1790d1c4c0ca81211399e0e0af16333276f375209e71a37b67698a373db5b47a" +checksum = "b082d80e3e3cc52b2ed634388d436fe1f4de6af5786cc2de9ba9737527bdf555" dependencies = [ "arrayvec", "borsh", @@ -4727,9 +4714,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "rustc-hash" @@ -4738,19 +4725,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] -name = "rustc_version" -version = "0.4.0" +name = "rustc-hash" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ "semver", ] [[package]] name = "rustix" -version = "0.38.31" +version = "0.38.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" +checksum = "aa260229e6538e52293eeb577aabd09945a09d6d9cc0fc550ed7529056c2e32a" dependencies = [ "bitflags 2.6.0", "errno", @@ -4761,9 +4754,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.11" +version = "0.23.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4828ea528154ae444e5a642dbb7d5623354030dc9822b83fd9bb79683c7399d0" +checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e" dependencies = [ "log", "once_cell", @@ -4776,25 +4769,24 @@ dependencies = [ [[package]] name = "rustls-pemfile" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" dependencies = [ - "base64 0.22.1", "rustls-pki-types", ] [[package]] name = "rustls-pki-types" -version = "1.7.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" +checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" [[package]] name = "rustls-webpki" -version = "0.102.5" +version = "0.102.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a6fccd794a42c2c105b513a2f62bc3fd8f3ba57a4593677ceb0bd035164d78" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" dependencies = [ "ring", "rustls-pki-types", @@ -4803,21 +4795,21 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" [[package]] name = "ryu" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "safetensors" -version = "0.4.2" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d980e6bfb34436fb0a81e42bc41af43f11805bbbca443e7f68e9faaabe669ed" +checksum = "44560c11236a6130a46ce36c836a62936dc81ebf8c36a37947423571be0e55b6" dependencies = [ "serde", "serde_json", @@ -4860,9 +4852,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.18" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" dependencies = [ "serde", ] @@ -4875,9 +4867,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.210" +version = "1.0.214" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" dependencies = [ "serde_derive", ] @@ -4893,20 +4885,20 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.210" +version = "1.0.214" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.132" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ "indexmap", "itoa", @@ -4926,9 +4918,9 @@ dependencies = [ [[package]] name = "serde_spanned" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79e674e01f999af37c49f70a6ede167a8a60b2503e56c5599532a65baa5969a0" +checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" dependencies = [ "serde", ] @@ -4995,9 +4987,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook-registry" -version = "1.4.1" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" dependencies = [ "libc", ] @@ -5010,15 +5002,15 @@ checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" [[package]] name = "simdutf8" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "similar" -version = "2.2.1" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420acb44afdae038210c99e69aae24109f32f15500aa708e81d46c9f29d55fcf" +checksum = "1de1d4f81173b03af4c0cbed3c898f6bff5b870e4a7f5d6f4057d62a7a4b686e" [[package]] name = "simple_asn1" @@ -5046,9 +5038,9 @@ checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" [[package]] name = "slab" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" dependencies = [ "autocfg", ] @@ -5092,22 +5084,12 @@ dependencies = [ [[package]] name = "socket2" -version = "0.4.9" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" +checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" dependencies = [ "libc", - "winapi", -] - -[[package]] -name = "socket2" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" -dependencies = [ - "libc", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -5176,31 +5158,31 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "strum" -version = "0.26.2" +version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" dependencies = [ "strum_macros", ] [[package]] name = "strum_macros" -version = "0.26.2" +version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" dependencies = [ - "heck 0.4.1", + "heck", "proc-macro2", "quote", "rustversion", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] name = "subtle" -version = "2.5.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" @@ -5215,9 +5197,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.60" +version = "2.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3" +checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56" dependencies = [ "proc-macro2", "quote", @@ -5233,7 +5215,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -5241,6 +5223,9 @@ name = "sync_wrapper" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +dependencies = [ + "futures-core", +] [[package]] name = "synchronoise" @@ -5259,7 +5244,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -5299,9 +5284,9 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] name = "tar" -version = "0.4.41" +version = "0.4.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb797dad5fb5b76fcf519e702f4a589483b5ef06567f160c392832c1f5e44909" +checksum = "4ff6c40d3aedb5e06b57c6f669ad17ab063dd1e63d977c6a88e7f4dfa4f04020" dependencies = [ "filetime", "libc", @@ -5319,14 +5304,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.10.1" +version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" dependencies = [ "cfg-if", "fastrand", + "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -5349,22 +5335,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.61" +version = "1.0.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +checksum = "5d11abd9594d9b38965ef50805c5e469ca9cc6f197f883f717e0269a3057b3d5" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.61" +version = "1.0.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -5389,7 +5375,7 @@ dependencies = [ "fancy-regex 0.12.0", "lazy_static", "parking_lot", - "rustc-hash", + "rustc-hash 1.1.0", ] [[package]] @@ -5446,9 +5432,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.6.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" dependencies = [ "tinyvec_macros", ] @@ -5491,32 +5477,31 @@ dependencies = [ [[package]] name = "tokio" -version = "1.38.0" +version = "1.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" +checksum = "145f3413504347a2be84393cc8a7d2fb4d863b375909ea59f2158261aa258bbb" dependencies = [ "backtrace", "bytes", "libc", "mio", - "num_cpus", "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.5.5", + "socket2", "tokio-macros", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "tokio-macros" -version = "2.3.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -5532,9 +5517,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.11" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" dependencies = [ "bytes", "futures-core", @@ -5545,75 +5530,43 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.14" +version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f49eb2ab21d2f26bd6db7bf383edc527a7ebaee412d17af4d40fdccd442f335" +checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e" dependencies = [ "serde", "serde_spanned", "toml_datetime", - "toml_edit 0.22.15", + "toml_edit", ] [[package]] name = "toml_datetime" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.21.0" +version = "0.22.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34d383cd00a163b4a5b85053df514d45bc330f6de7737edfe0a93311d1eaa03" -dependencies = [ - "indexmap", - "toml_datetime", - "winnow 0.5.40", -] - -[[package]] -name = "toml_edit" -version = "0.22.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59a3a72298453f564e2b111fa896f8d07fabb36f51f06d7e875fc5e0b5a3ef1" +checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" dependencies = [ "indexmap", "serde", "serde_spanned", "toml_datetime", - "winnow 0.6.13", + "winnow", ] -[[package]] -name = "tower" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" -dependencies = [ - "futures-core", - "futures-util", - "pin-project", - "pin-project-lite", - "tokio", - "tower-layer", - "tower-service", -] - -[[package]] -name = "tower-layer" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" - [[package]] name = "tower-service" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" @@ -5629,9 +5582,9 @@ dependencies = [ [[package]] name = "tracing-actix-web" -version = "0.7.11" +version = "0.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ee9e39a66d9b615644893ffc1704d2a89b5b315b7fd0228ad3182ca9a306b19" +checksum = "6b87073920bcce23e9f5cb0d2671e9f01d6803bb5229c159b2f5ce6806d73ffc" dependencies = [ "actix-web", "mutually_exclusive_features", @@ -5648,7 +5601,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -5727,9 +5680,9 @@ dependencies = [ [[package]] name = "try-lock" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "typenum" @@ -5739,9 +5692,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "ucd-trie" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" [[package]] name = "unescaper" @@ -5754,18 +5707,15 @@ dependencies = [ [[package]] name = "unicase" -version = "2.6.0" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" -dependencies = [ - "version_check", -] +checksum = "7e51b68083f157f853b6379db119d1c1be0e6e4dec98101079dec41f6f5cf6df" [[package]] name = "unicode-bidi" -version = "0.3.15" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" +checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893" [[package]] name = "unicode-blocks" @@ -5775,15 +5725,15 @@ checksum = "6b12e05d9e06373163a9bb6bb8c263c261b396643a99445fe6b9811fd376581b" [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" [[package]] name = "unicode-normalization" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" dependencies = [ "tinyvec", ] @@ -5799,15 +5749,15 @@ dependencies = [ [[package]] name = "unicode-segmentation" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.1.11" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "unicode_categories" @@ -5823,9 +5773,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "ureq" -version = "2.10.0" +version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72139d247e5f97a3eff96229a7ae85ead5328a39efe76f8bf5a06313d505b6ea" +checksum = "b74fc6b57825be3373f7054754755f03ac3a8f5d70015ccad699ba2029956f4a" dependencies = [ "base64 0.22.1", "flate2", @@ -5860,21 +5810,21 @@ checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" [[package]] name = "utf8-width" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" +checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3" [[package]] name = "utf8parse" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" +checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" dependencies = [ "getrandom", "serde", @@ -5894,24 +5844,24 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "vergen" -version = "9.0.0" +version = "9.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c32e7318e93a9ac53693b6caccfb05ff22e04a44c7cf8a279051f24c09da286f" +checksum = "349ed9e45296a581f455bc18039878f409992999bc1d5da12a6800eb18c8752f" dependencies = [ "anyhow", - "derive_builder 0.20.0", + "derive_builder 0.20.2", "rustversion", "vergen-lib", ] [[package]] name = "vergen-git2" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62c52cd2b2b8b7ec75fc20111b3022ac3ff83e4fc14b9497cfcfd39c54f9c67" +checksum = "e771aff771c0d7c2f42e434e2766d304d917e29b40f0424e8faaaa936bbc3f29" dependencies = [ "anyhow", - "derive_builder 0.20.0", + "derive_builder 0.20.2", "git2", "rustversion", "time", @@ -5921,21 +5871,20 @@ dependencies = [ [[package]] name = "vergen-lib" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e06bee42361e43b60f363bad49d63798d0f42fb1768091812270eca00c784720" +checksum = "229eaddb0050920816cf051e619affaf18caa3dd512de8de5839ccbc8e53abb0" dependencies = [ "anyhow", - "derive_builder 0.20.0", - "getset", + "derive_builder 0.20.2", "rustversion", ] [[package]] name = "version_check" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "walkdir" @@ -5975,34 +5924,35 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" dependencies = [ "cfg-if", + "once_cell", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.37" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" +checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" dependencies = [ "cfg-if", "js-sys", @@ -6012,9 +5962,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -6022,28 +5972,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] name = "wasm-streams" -version = "0.4.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" dependencies = [ "futures-util", "js-sys", @@ -6054,9 +6004,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.64" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" dependencies = [ "js-sys", "wasm-bindgen", @@ -6064,9 +6014,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.26.1" +version = "0.26.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3de34ae270483955a94f4b21bdaaeb83d508bb84a01435f393818edb0012009" +checksum = "841c67bff177718f1d4dfefde8d8f0e78f9b6589319ba88312f567fc5841a958" dependencies = [ "rustls-pki-types", ] @@ -6099,11 +6049,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.6" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "winapi", + "windows-sys 0.59.0", ] [[package]] @@ -6119,7 +6069,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" dependencies = [ "windows-core", - "windows-targets 0.52.4", + "windows-targets 0.52.6", ] [[package]] @@ -6128,16 +6078,37 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.4", + "windows-targets 0.52.6", ] [[package]] -name = "windows-sys" -version = "0.45.0" +name = "windows-registry" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" dependencies = [ - "windows-targets 0.42.2", + "windows-result", + "windows-strings", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets 0.52.6", ] [[package]] @@ -6146,7 +6117,7 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "windows-targets 0.48.1", + "windows-targets 0.48.5", ] [[package]] @@ -6155,217 +6126,157 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.4", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", ] [[package]] name = "windows-targets" -version = "0.42.2" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", ] [[package]] name = "windows-targets" -version = "0.48.1" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.48.0", - "windows_aarch64_msvc 0.48.0", - "windows_i686_gnu 0.48.0", - "windows_i686_msvc 0.48.0", - "windows_x86_64_gnu 0.48.0", - "windows_x86_64_gnullvm 0.48.0", - "windows_x86_64_msvc 0.48.0", -] - -[[package]] -name = "windows-targets" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" -dependencies = [ - "windows_aarch64_gnullvm 0.52.4", - "windows_aarch64_msvc 0.52.4", - "windows_i686_gnu 0.52.4", - "windows_i686_msvc 0.52.4", - "windows_x86_64_gnu 0.52.4", - "windows_x86_64_gnullvm 0.52.4", - "windows_x86_64_msvc 0.52.4", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] [[package]] name = "windows_aarch64_gnullvm" -version = "0.42.2" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.48.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" -version = "0.42.2" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.48.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" -version = "0.42.2" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.48.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] -name = "windows_i686_gnu" -version = "0.52.4" +name = "windows_i686_gnullvm" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" -version = "0.42.2" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.48.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" -version = "0.42.2" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.48.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" -version = "0.42.2" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.48.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" -version = "0.42.2" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.48.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.5.40" +version = "0.6.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" dependencies = [ "memchr", ] -[[package]] -name = "winnow" -version = "0.6.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59b5e5f6c299a3c7890b876a2a587f3115162487e704907d9b6cd29473052ba1" -dependencies = [ - "memchr", -] - -[[package]] -name = "winreg" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" -dependencies = [ - "cfg-if", - "windows-sys 0.48.0", -] - [[package]] name = "wiremock" -version = "0.6.0" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec874e1eef0df2dcac546057fe5e29186f09c378181cd7b635b4b7bcc98e9d81" +checksum = "7fff469918e7ca034884c7fd8f93fe27bacb7fcb599fd879df6c7b429a29b646" dependencies = [ "assert-json-diff", "async-trait", - "base64 0.21.7", + "base64 0.22.1", "deadpool", "futures", "http 1.1.0", @@ -6443,9 +6354,9 @@ dependencies = [ [[package]] name = "yoke" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65e71b2e4f287f467794c671e2b8f8a5f3716b3c829079a1c44740148eff07e4" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" dependencies = [ "serde", "stable_deref_trait", @@ -6455,54 +6366,55 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e6936f0cce458098a201c245a11bef556c6a0181129c7034d10d76d1ec3a2b8" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", "synstructure", ] [[package]] name = "zerocopy" -version = "0.7.32" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ + "byteorder", "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.32" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] name = "zerofrom" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "655b0814c5c0b19ade497851070c640773304939a6c0fd5f5fb43da0696d05b7" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6a647510471d372f2e6c2e6b7219e44d8c574d24fdc11c610a61455782f18c3" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", "synstructure", ] @@ -6523,7 +6435,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.85", ] [[package]] @@ -6543,9 +6455,9 @@ dependencies = [ [[package]] name = "zip" -version = "2.1.3" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "775a2b471036342aa69bc5a602bc889cb0a06cda00477d0c69566757d5553d39" +checksum = "dc5e4288ea4057ae23afc69a4472434a87a2495cafce6632fd1c4ec9f5cf3494" dependencies = [ "aes", "arbitrary", @@ -6595,18 +6507,18 @@ dependencies = [ [[package]] name = "zstd-safe" -version = "7.2.0" +version = "7.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa556e971e7b568dc775c136fc9de8c779b1c2fc3a63defaafadffdbd3181afa" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" dependencies = [ "zstd-sys", ] [[package]] name = "zstd-sys" -version = "2.0.10+zstd.1.5.6" +version = "2.0.13+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" dependencies = [ "cc", "pkg-config", diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 14bbcfe53..fdf213a6b 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -33,13 +33,11 @@ use meilisearch_types::milli::heed::CompactionOption; use meilisearch_types::milli::update::new::indexer::{ self, retrieve_or_guess_primary_key, UpdateByFunction, }; -use meilisearch_types::milli::update::{ - IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings, -}; +use meilisearch_types::milli::update::{IndexDocumentsMethod, Settings as MilliSettings}; use meilisearch_types::milli::vector::parsed_vectors::{ ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME, }; -use meilisearch_types::milli::{self, Filter, Object}; +use meilisearch_types::milli::{self, Filter}; use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task}; use meilisearch_types::{compression, Index, VERSION_FILE_NAME}; @@ -50,7 +48,7 @@ use uuid::Uuid; use crate::autobatcher::{self, BatchKind}; use crate::utils::{self, swap_index_uid_in_task}; -use crate::{Error, IndexScheduler, MustStopProcessing, ProcessingTasks, Result, TaskId}; +use crate::{Error, IndexScheduler, ProcessingTasks, Result, TaskId}; /// Represents a combination of tasks that can all be processed at the same time. /// diff --git a/meili-snap/Cargo.toml b/meili-snap/Cargo.toml index e86feabd9..6c68e563c 100644 --- a/meili-snap/Cargo.toml +++ b/meili-snap/Cargo.toml @@ -11,6 +11,6 @@ edition.workspace = true license.workspace = true [dependencies] -insta = { version = "^1.39.0", features = ["json", "redactions"] } +insta = { version = "=1.39.0", features = ["json", "redactions"] } md5 = "0.7.0" once_cell = "1.19" diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 46633bdec..df463c902 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -94,10 +94,12 @@ tracing = "0.1.40" ureq = { version = "2.10.0", features = ["json"] } url = "2.5.2" rayon-par-bridge = "0.1.0" -hashbrown = "0.14.5" +hashbrown = "0.15.0" raw-collections = { git = "https://github.com/dureuill/raw-collections.git", version = "0.1.0" } bumpalo = "3.16.0" thread_local = "1.1.8" +allocator-api2 = "0.2.18" +rustc-hash = "2.0.0" [dev-dependencies] mimalloc = { version = "0.1.43", default-features = false } diff --git a/milli/src/update/index_documents/helpers/grenad_helpers.rs b/milli/src/update/index_documents/helpers/grenad_helpers.rs index 220567208..b7da39878 100644 --- a/milli/src/update/index_documents/helpers/grenad_helpers.rs +++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs @@ -83,6 +83,8 @@ pub fn writer_into_reader( grenad::Reader::new(BufReader::new(file)).map_err(Into::into) } +/// # Safety +/// We use memory mapping inside. So, according to the Rust community, it's unsafe. pub unsafe fn as_cloneable_grenad( reader: &grenad::Reader>, ) -> Result> { diff --git a/milli/src/update/new/channel.rs b/milli/src/update/new/channel.rs index d63180ba1..af6e2215c 100644 --- a/milli/src/update/new/channel.rs +++ b/milli/src/update/new/channel.rs @@ -1,42 +1,33 @@ -use std::fs::File; use std::marker::PhantomData; +use std::sync::atomic::{AtomicUsize, Ordering}; use crossbeam_channel::{IntoIter, Receiver, SendError, Sender}; -use grenad::Merger; use heed::types::Bytes; -use memmap2::Mmap; use roaring::RoaringBitmap; use super::extract::FacetKind; use super::StdResult; -use crate::index::main_key::{DOCUMENTS_IDS_KEY, WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY}; +use crate::index::main_key::DOCUMENTS_IDS_KEY; use crate::update::new::KvReaderFieldId; -use crate::update::MergeDeladdCboRoaringBitmaps; use crate::{DocumentId, Index}; /// The capacity of the channel is currently in number of messages. -pub fn merger_writer_channel(cap: usize) -> (MergerSender, WriterReceiver) { +pub fn extractor_writer_channel(cap: usize) -> (ExtractorSender, WriterReceiver) { let (sender, receiver) = crossbeam_channel::bounded(cap); ( - MergerSender { + ExtractorSender { sender, send_count: Default::default(), writer_contentious_count: Default::default(), - merger_contentious_count: Default::default(), + extractor_contentious_count: Default::default(), }, WriterReceiver(receiver), ) } -/// The capacity of the channel is currently in number of messages. -pub fn extractors_merger_channels(cap: usize) -> (ExtractorSender, MergerReceiver) { - let (sender, receiver) = crossbeam_channel::bounded(cap); - (ExtractorSender(sender), MergerReceiver(receiver)) -} - -pub enum KeyValueEntry { - SmallInMemory { key_length: usize, data: Box<[u8]> }, - LargeOnDisk { key: Box<[u8]>, value: Mmap }, +pub struct KeyValueEntry { + pub key_length: usize, + pub data: Box<[u8]>, } impl KeyValueEntry { @@ -44,32 +35,22 @@ impl KeyValueEntry { let mut data = Vec::with_capacity(key.len() + value.len()); data.extend_from_slice(key); data.extend_from_slice(value); - KeyValueEntry::SmallInMemory { key_length: key.len(), data: data.into_boxed_slice() } + KeyValueEntry { key_length: key.len(), data: data.into_boxed_slice() } } pub fn from_small_key_bitmap(key: &[u8], bitmap: RoaringBitmap) -> Self { let mut data = Vec::with_capacity(key.len() + bitmap.serialized_size()); data.extend_from_slice(key); bitmap.serialize_into(&mut data).unwrap(); - KeyValueEntry::SmallInMemory { key_length: key.len(), data: data.into_boxed_slice() } - } - - pub fn from_large_key_value(key: &[u8], value: Mmap) -> Self { - KeyValueEntry::LargeOnDisk { key: key.to_vec().into_boxed_slice(), value } + KeyValueEntry { key_length: key.len(), data: data.into_boxed_slice() } } pub fn key(&self) -> &[u8] { - match self { - KeyValueEntry::SmallInMemory { key_length, data } => &data.as_ref()[..*key_length], - KeyValueEntry::LargeOnDisk { key, value: _ } => key.as_ref(), - } + &self.data[..self.key_length] } pub fn value(&self) -> &[u8] { - match self { - KeyValueEntry::SmallInMemory { key_length, data } => &data.as_ref()[*key_length..], - KeyValueEntry::LargeOnDisk { key: _, value } => value.as_ref(), - } + &self.data[self.key_length..] } } @@ -92,37 +73,6 @@ pub enum EntryOperation { Write(KeyValueEntry), } -pub struct DocumentEntry { - docid: DocumentId, - content: Box<[u8]>, -} - -impl DocumentEntry { - pub fn new_uncompressed(docid: DocumentId, content: Box) -> Self { - DocumentEntry { docid, content: content.into() } - } - - pub fn new_compressed(docid: DocumentId, content: Box<[u8]>) -> Self { - DocumentEntry { docid, content } - } - - pub fn key(&self) -> [u8; 4] { - self.docid.to_be_bytes() - } - - pub fn content(&self) -> &[u8] { - &self.content - } -} - -pub struct DocumentDeletionEntry(DocumentId); - -impl DocumentDeletionEntry { - pub fn key(&self) -> [u8; 4] { - self.0.to_be_bytes() - } -} - pub struct WriterOperation { database: Database, entry: EntryOperation, @@ -206,34 +156,32 @@ impl IntoIterator for WriterReceiver { } } -pub struct MergerSender { +pub struct ExtractorSender { sender: Sender, - /// The number of message we send in total in the channel. - send_count: std::cell::Cell, + /// The number of message we sent in total in the channel. + send_count: AtomicUsize, /// The number of times we sent something in a channel that was full. - writer_contentious_count: std::cell::Cell, + writer_contentious_count: AtomicUsize, /// The number of times we sent something in a channel that was empty. - merger_contentious_count: std::cell::Cell, + extractor_contentious_count: AtomicUsize, } -impl Drop for MergerSender { +impl Drop for ExtractorSender { fn drop(&mut self) { + let send_count = *self.send_count.get_mut(); + let writer_contentious_count = *self.writer_contentious_count.get_mut(); + let extractor_contentious_count = *self.extractor_contentious_count.get_mut(); eprintln!( - "Merger channel stats: {} sends, {} writer contentions ({}%), {} merger contentions ({}%)", - self.send_count.get(), - self.writer_contentious_count.get(), - (self.writer_contentious_count.get() as f32 / self.send_count.get() as f32) * 100.0, - self.merger_contentious_count.get(), - (self.merger_contentious_count.get() as f32 / self.send_count.get() as f32) * 100.0 + "Extractor channel stats: {send_count} sends, \ + {writer_contentious_count} writer contentions ({}%), \ + {extractor_contentious_count} extractor contentions ({}%)", + (writer_contentious_count as f32 / send_count as f32) * 100.0, + (extractor_contentious_count as f32 / send_count as f32) * 100.0 ) } } -impl MergerSender { - pub fn main(&self) -> MainSender<'_> { - MainSender(self) - } - +impl ExtractorSender { pub fn docids(&self) -> WordDocidsSender<'_, D> { WordDocidsSender { sender: self, _marker: PhantomData } } @@ -263,12 +211,12 @@ impl MergerSender { fn send(&self, op: WriterOperation) -> StdResult<(), SendError<()>> { if self.sender.is_full() { - self.writer_contentious_count.set(self.writer_contentious_count.get() + 1); + self.writer_contentious_count.fetch_add(1, Ordering::SeqCst); } if self.sender.is_empty() { - self.merger_contentious_count.set(self.merger_contentious_count.get() + 1); + self.extractor_contentious_count.fetch_add(1, Ordering::SeqCst); } - self.send_count.set(self.send_count.get() + 1); + self.send_count.fetch_add(1, Ordering::SeqCst); match self.sender.send(op) { Ok(()) => Ok(()), Err(SendError(_)) => Err(SendError(())), @@ -276,129 +224,48 @@ impl MergerSender { } } -pub struct MainSender<'a>(&'a MergerSender); - -impl MainSender<'_> { - pub fn write_words_fst(&self, value: Mmap) -> StdResult<(), SendError<()>> { - let entry = EntryOperation::Write(KeyValueEntry::from_large_key_value( - WORDS_FST_KEY.as_bytes(), - value, - )); - match self.0.send(WriterOperation { database: Database::Main, entry }) { - Ok(()) => Ok(()), - Err(SendError(_)) => Err(SendError(())), - } - } - - pub fn write_words_prefixes_fst(&self, value: Mmap) -> StdResult<(), SendError<()>> { - let entry = EntryOperation::Write(KeyValueEntry::from_large_key_value( - WORDS_PREFIXES_FST_KEY.as_bytes(), - value, - )); - match self.0.send(WriterOperation { database: Database::Main, entry }) { - Ok(()) => Ok(()), - Err(SendError(_)) => Err(SendError(())), - } - } - - pub fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> { - let entry = EntryOperation::Delete(KeyEntry::from_key(key)); - match self.0.send(WriterOperation { database: Database::Main, entry }) { - Ok(()) => Ok(()), - Err(SendError(_)) => Err(SendError(())), - } - } -} - pub enum ExactWordDocids {} pub enum FidWordCountDocids {} pub enum WordDocids {} pub enum WordFidDocids {} pub enum WordPairProximityDocids {} pub enum WordPositionDocids {} -pub enum FacetDocids {} pub trait DatabaseType { const DATABASE: Database; } -pub trait MergerOperationType { - fn new_merger_operation(merger: Merger) -> MergerOperation; -} - impl DatabaseType for ExactWordDocids { const DATABASE: Database = Database::ExactWordDocids; } -impl MergerOperationType for ExactWordDocids { - fn new_merger_operation(merger: Merger) -> MergerOperation { - MergerOperation::ExactWordDocidsMerger(merger) - } -} - impl DatabaseType for FidWordCountDocids { const DATABASE: Database = Database::FidWordCountDocids; } -impl MergerOperationType for FidWordCountDocids { - fn new_merger_operation(merger: Merger) -> MergerOperation { - MergerOperation::FidWordCountDocidsMerger(merger) - } -} - impl DatabaseType for WordDocids { const DATABASE: Database = Database::WordDocids; } -impl MergerOperationType for WordDocids { - fn new_merger_operation(merger: Merger) -> MergerOperation { - MergerOperation::WordDocidsMerger(merger) - } -} - impl DatabaseType for WordFidDocids { const DATABASE: Database = Database::WordFidDocids; } -impl MergerOperationType for WordFidDocids { - fn new_merger_operation(merger: Merger) -> MergerOperation { - MergerOperation::WordFidDocidsMerger(merger) - } -} - impl DatabaseType for WordPairProximityDocids { const DATABASE: Database = Database::WordPairProximityDocids; } -impl MergerOperationType for WordPairProximityDocids { - fn new_merger_operation(merger: Merger) -> MergerOperation { - MergerOperation::WordPairProximityDocidsMerger(merger) - } -} - impl DatabaseType for WordPositionDocids { const DATABASE: Database = Database::WordPositionDocids; } -impl MergerOperationType for WordPositionDocids { - fn new_merger_operation(merger: Merger) -> MergerOperation { - MergerOperation::WordPositionDocidsMerger(merger) - } -} - -impl MergerOperationType for FacetDocids { - fn new_merger_operation(merger: Merger) -> MergerOperation { - MergerOperation::FacetDocidsMerger(merger) - } -} - pub trait DocidsSender { fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>>; fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>>; } pub struct WordDocidsSender<'a, D> { - sender: &'a MergerSender, + sender: &'a ExtractorSender, _marker: PhantomData, } @@ -421,7 +288,7 @@ impl DocidsSender for WordDocidsSender<'_, D> { } pub struct FacetDocidsSender<'a> { - sender: &'a MergerSender, + sender: &'a ExtractorSender, } impl DocidsSender for FacetDocidsSender<'_> { @@ -456,7 +323,7 @@ impl DocidsSender for FacetDocidsSender<'_> { } pub struct FacetSearchableSender<'a> { - sender: &'a MergerSender, + sender: &'a ExtractorSender, } impl FacetSearchableSender<'_> { @@ -481,25 +348,9 @@ impl FacetSearchableSender<'_> { Err(SendError(_)) => Err(SendError(())), } } - - pub fn write_fst(&self, key: &[u8], value: Mmap) -> StdResult<(), SendError<()>> { - let entry = EntryOperation::Write(KeyValueEntry::from_large_key_value(key, value)); - match self.sender.send(WriterOperation { database: Database::FacetIdStringFst, entry }) { - Ok(()) => Ok(()), - Err(SendError(_)) => Err(SendError(())), - } - } - - pub fn delete_fst(&self, key: &[u8]) -> StdResult<(), SendError<()>> { - let entry = EntryOperation::Delete(KeyEntry::from_key(key)); - match self.sender.send(WriterOperation { database: Database::FacetIdStringFst, entry }) { - Ok(()) => Ok(()), - Err(SendError(_)) => Err(SendError(())), - } - } } -pub struct DocumentsSender<'a>(&'a MergerSender); +pub struct DocumentsSender<'a>(&'a ExtractorSender); impl DocumentsSender<'_> { /// TODO do that efficiently @@ -542,86 +393,3 @@ impl DocumentsSender<'_> { } } } - -pub enum MergerOperation { - ExactWordDocidsMerger(Merger), - FidWordCountDocidsMerger(Merger), - WordDocidsMerger(Merger), - WordFidDocidsMerger(Merger), - WordPairProximityDocidsMerger(Merger), - WordPositionDocidsMerger(Merger), - FacetDocidsMerger(Merger), - DeleteDocument { docid: DocumentId, external_id: String }, - InsertDocument { docid: DocumentId, external_id: String, document: Box }, - FinishedDocument, -} - -pub struct MergerReceiver(Receiver); - -impl IntoIterator for MergerReceiver { - type Item = MergerOperation; - type IntoIter = IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.0.into_iter() - } -} - -pub struct ExtractorSender(Sender); - -impl ExtractorSender { - pub fn document_sender(&self) -> DocumentSender<'_> { - DocumentSender(Some(&self.0)) - } - - pub fn send_searchable( - &self, - merger: Merger, - ) -> StdResult<(), SendError<()>> { - match self.0.send(D::new_merger_operation(merger)) { - Ok(()) => Ok(()), - Err(SendError(_)) => Err(SendError(())), - } - } -} - -pub struct DocumentSender<'a>(Option<&'a Sender>); - -impl DocumentSender<'_> { - pub fn insert( - &self, - docid: DocumentId, - external_id: String, - document: Box, - ) -> StdResult<(), SendError<()>> { - let sender = self.0.unwrap(); - match sender.send(MergerOperation::InsertDocument { docid, external_id, document }) { - Ok(()) => Ok(()), - Err(SendError(_)) => Err(SendError(())), - } - } - - pub fn delete(&self, docid: DocumentId, external_id: String) -> StdResult<(), SendError<()>> { - let sender = self.0.unwrap(); - match sender.send(MergerOperation::DeleteDocument { docid, external_id }) { - Ok(()) => Ok(()), - Err(SendError(_)) => Err(SendError(())), - } - } - - pub fn finish(mut self) -> StdResult<(), SendError<()>> { - let sender = self.0.take().unwrap(); - match sender.send(MergerOperation::FinishedDocument) { - Ok(()) => Ok(()), - Err(SendError(_)) => Err(SendError(())), - } - } -} - -impl Drop for DocumentSender<'_> { - fn drop(&mut self) { - if let Some(sender) = self.0.take() { - let _ = sender.send(MergerOperation::FinishedDocument); - } - } -} diff --git a/milli/src/update/new/extract/cache.rs b/milli/src/update/new/extract/cache.rs index 2fbe427f3..a366435d8 100644 --- a/milli/src/update/new/extract/cache.rs +++ b/milli/src/update/new/extract/cache.rs @@ -1,299 +1,611 @@ -use std::fmt::Write as _; -use std::mem; -use std::num::NonZeroUsize; +//! # How the Merge Algorithm works +//! +//! Each extractor create #Threads caches and balances the entries +//! based on the hash of the keys. To do that we can use the +//! hashbrown::hash_map::RawEntryBuilderMut::from_key_hashed_nocheck. +//! This way we can compute the hash on our own, decide on the cache to +//! target, and insert it into the right HashMap. +//! +//! #Thread -> caches +//! t1 -> [t1c1, t1c2, t1c3] +//! t2 -> [t2c1, t2c2, t2c3] +//! t3 -> [t3c1, t3c2, t3c3] +//! +//! When the extractors are done filling the caches, we want to merge +//! the content of all the caches. We do a transpose and each thread is +//! assigned the associated cache. By doing that we know that every key +//! is put in a known cache and will collide with keys in the other +//! caches of the other threads. +//! +//! #Thread -> caches +//! t1 -> [t1c1, t2c1, t3c1] +//! t2 -> [t1c2, t2c2, t3c2] +//! t3 -> [t1c3, t2c3, t3c3] +//! +//! When we encountered a miss in the other caches we must still try +//! to find it in the spilled entries. This is the reason why we use +//! a grenad sorter/reader so that we can seek "efficiently" for a key. +//! +//! ## More Detailled Algorithm +//! +//! Each sub-cache has an in-memory HashMap and some spilled +//! lexicographically ordered entries on disk (grenad). We first iterate +//! over the spilled entries of all the caches at once by using a merge +//! join algorithm. This algorithm will merge the entries by using its +//! merge function. +//! +//! Everytime a merged entry is emited by the merge join algorithm we also +//! fetch the value from the other in-memory caches (HashMaps) to finish +//! the merge. Everytime we retrieve an entry from the in-memory caches +//! we mark them with a tombstone for later. +//! +//! Once we are done with the spilled entries we iterate over the in-memory +//! HashMaps. We iterate over the first one, retrieve the content from the +//! other onces and mark them with a tombstone again. We also make sure +//! to ignore the dead (tombstoned) ones. +//! +//! ## Memory Control +//! +//! We can detect that there are no more memory available when the +//! bump allocator reaches a threshold. When this is the case we +//! freeze the cache. There is one bump allocator by thread and the +//! memory must be well balanced as we manage one type of extraction +//! at a time with well-balanced documents. +//! +//! It means that the unknown new keys added to the +//! cache are directly spilled to disk: basically a key followed by a +//! del/add bitmap. For the known keys we can keep modifying them in +//! the materialized version in the cache: update the del/add bitmaps. +//! +//! For now we can use a grenad sorter for spilling even thought I think +//! it's not the most efficient way (too many files open, sorting entries). -use grenad::{MergeFunction, Sorter}; -use roaring::bitmap::Statistics; +use std::cmp::Ordering; +use std::collections::binary_heap::PeekMut; +use std::collections::BinaryHeap; +use std::fs::File; +use std::hash::BuildHasher; +use std::io::BufReader; +use std::{io, iter, mem}; + +use bumpalo::Bump; +use grenad::ReaderCursor; +use hashbrown::hash_map::RawEntryMut; +use hashbrown::HashMap; +use raw_collections::map::FrozenMap; use roaring::RoaringBitmap; -use smallvec::SmallVec; +use rustc_hash::FxBuildHasher; -use super::lru::Lru; use crate::update::del_add::{DelAdd, KvWriterDelAdd}; -use crate::CboRoaringBitmapCodec; +use crate::update::new::indexer::document_changes::MostlySend; +use crate::update::new::KvReaderDelAdd; +use crate::update::MergeDeladdCboRoaringBitmaps; +use crate::{CboRoaringBitmapCodec, Result}; -const KEY_SIZE: usize = 12; +/// A cache that stores bytes keys associated to CboDelAddRoaringBitmaps. +/// +/// Internally balances the content over `N` buckets for future merging. +pub struct BalancedCaches<'extractor> { + hasher: FxBuildHasher, + alloc: &'extractor Bump, + max_memory: Option, + caches: InnerCaches<'extractor>, +} -#[derive(Debug)] -pub struct CboCachedSorter { - cache: Lru, DelAddRoaringBitmap>, - sorter: Sorter, +enum InnerCaches<'extractor> { + Normal(NormalCaches<'extractor>), + Spilling(SpillingCaches<'extractor>), +} + +impl<'extractor> BalancedCaches<'extractor> { + pub fn new_in(buckets: usize, max_memory: Option, alloc: &'extractor Bump) -> Self { + Self { + hasher: FxBuildHasher, + max_memory, + caches: InnerCaches::Normal(NormalCaches { + caches: iter::repeat_with(|| HashMap::with_hasher_in(FxBuildHasher, alloc)) + .take(buckets) + .collect(), + }), + alloc, + } + } + + fn buckets(&self) -> usize { + match &self.caches { + InnerCaches::Normal(caches) => caches.caches.len(), + InnerCaches::Spilling(caches) => caches.caches.len(), + } + } + + pub fn insert_del_u32(&mut self, key: &[u8], n: u32) -> Result<()> { + if self.max_memory.map_or(false, |mm| self.alloc.allocated_bytes() >= mm) { + self.start_spilling()?; + } + + let buckets = self.buckets(); + match &mut self.caches { + InnerCaches::Normal(normal) => { + normal.insert_del_u32(&self.hasher, self.alloc, buckets, key, n); + Ok(()) + } + InnerCaches::Spilling(spilling) => { + spilling.insert_del_u32(&self.hasher, buckets, key, n) + } + } + } + + pub fn insert_add_u32(&mut self, key: &[u8], n: u32) -> Result<()> { + if self.max_memory.map_or(false, |mm| self.alloc.allocated_bytes() >= mm) { + self.start_spilling()?; + } + + let buckets = self.buckets(); + match &mut self.caches { + InnerCaches::Normal(normal) => { + normal.insert_add_u32(&self.hasher, self.alloc, buckets, key, n); + Ok(()) + } + InnerCaches::Spilling(spilling) => { + spilling.insert_add_u32(&self.hasher, buckets, key, n) + } + } + } + + /// Make sure the cache is no longer allocating data + /// and writes every new and unknow entry to disk. + fn start_spilling(&mut self) -> Result<()> { + let BalancedCaches { hasher: _, alloc, max_memory: _, caches } = self; + + if let InnerCaches::Normal(normal_caches) = caches { + eprintln!( + "We are spilling after we allocated {} bytes on thread #{}", + alloc.allocated_bytes(), + rayon::current_thread_index().unwrap_or(0) + ); + + let allocated: usize = normal_caches.caches.iter().map(|m| m.allocation_size()).sum(); + eprintln!("The last allocated HasMap took {allocated} bytes"); + + let dummy = NormalCaches { caches: Vec::new() }; + let NormalCaches { caches: cache_maps } = mem::replace(normal_caches, dummy); + *caches = InnerCaches::Spilling(SpillingCaches::from_cache_maps(cache_maps)); + } + + Ok(()) + } + + pub fn freeze(&mut self) -> Result>> { + match &mut self.caches { + InnerCaches::Normal(NormalCaches { caches }) => caches + .iter_mut() + .enumerate() + .map(|(bucket, map)| { + Ok(FrozenCache { bucket, cache: FrozenMap::new(map), spilled: Vec::new() }) + }) + .collect(), + InnerCaches::Spilling(SpillingCaches { caches, spilled_entries, .. }) => caches + .iter_mut() + .zip(mem::take(spilled_entries)) + .enumerate() + .map(|(bucket, (map, sorter))| { + let spilled = sorter + .into_reader_cursors()? + .into_iter() + .map(ReaderCursor::into_inner) + .map(BufReader::new) + .map(|bufreader| grenad::Reader::new(bufreader).map_err(Into::into)) + .collect::>()?; + Ok(FrozenCache { bucket, cache: FrozenMap::new(map), spilled }) + }) + .collect(), + } + } +} + +unsafe impl MostlySend for BalancedCaches<'_> {} + +struct NormalCaches<'extractor> { + caches: Vec>, +} + +impl<'extractor> NormalCaches<'extractor> { + pub fn insert_del_u32( + &mut self, + hasher: &FxBuildHasher, + alloc: &'extractor Bump, + buckets: usize, + key: &[u8], + n: u32, + ) { + let hash = hasher.hash_one(key); + let bucket = compute_bucket_from_hash(buckets, hash); + + match self.caches[bucket].raw_entry_mut().from_hash(hash, |&k| k == key) { + RawEntryMut::Occupied(mut entry) => { + entry.get_mut().del.get_or_insert_with(RoaringBitmap::default).insert(n); + } + RawEntryMut::Vacant(entry) => { + entry.insert_hashed_nocheck( + hash, + alloc.alloc_slice_copy(key), + DelAddRoaringBitmap::new_del_u32(n), + ); + } + } + } + + pub fn insert_add_u32( + &mut self, + hasher: &FxBuildHasher, + alloc: &'extractor Bump, + buckets: usize, + key: &[u8], + n: u32, + ) { + let hash = hasher.hash_one(key); + let bucket = compute_bucket_from_hash(buckets, hash); + match self.caches[bucket].raw_entry_mut().from_hash(hash, |&k| k == key) { + RawEntryMut::Occupied(mut entry) => { + entry.get_mut().add.get_or_insert_with(RoaringBitmap::default).insert(n); + } + RawEntryMut::Vacant(entry) => { + entry.insert_hashed_nocheck( + hash, + alloc.alloc_slice_copy(key), + DelAddRoaringBitmap::new_add_u32(n), + ); + } + } + } +} + +struct SpillingCaches<'extractor> { + caches: Vec>, + spilled_entries: Vec>, deladd_buffer: Vec, cbo_buffer: Vec, - total_insertions: usize, - fitted_in_key: usize, } -impl CboCachedSorter { - pub fn new(cap: NonZeroUsize, sorter: Sorter) -> Self { - CboCachedSorter { - cache: Lru::new(cap), - sorter, +impl<'extractor> SpillingCaches<'extractor> { + fn from_cache_maps( + caches: Vec< + HashMap<&'extractor [u8], DelAddRoaringBitmap, FxBuildHasher, &'extractor Bump>, + >, + ) -> SpillingCaches<'extractor> { + SpillingCaches { + spilled_entries: iter::repeat_with(|| { + let mut builder = grenad::SorterBuilder::new(MergeDeladdCboRoaringBitmaps); + builder.dump_threshold(0); + builder.allow_realloc(false); + builder.build() + }) + .take(caches.len()) + .collect(), + caches, deladd_buffer: Vec::new(), cbo_buffer: Vec::new(), - total_insertions: 0, - fitted_in_key: 0, } } -} -impl CboCachedSorter { - pub fn insert_del_u32(&mut self, key: &[u8], n: u32) -> grenad::Result<(), MF::Error> { - match self.cache.get_mut(key) { - Some(DelAddRoaringBitmap { del, add: _ }) => { - del.get_or_insert_with(RoaringBitmap::default).insert(n); - } - None => { - self.total_insertions += 1; - self.fitted_in_key += (key.len() <= KEY_SIZE) as usize; - let value = DelAddRoaringBitmap::new_del_u32(n); - if let Some((key, deladd)) = self.cache.push(key.into(), value) { - self.write_entry(key, deladd)?; - } - } - } - - Ok(()) - } - - pub fn insert_del( + pub fn insert_del_u32( &mut self, + hasher: &FxBuildHasher, + buckets: usize, key: &[u8], - bitmap: RoaringBitmap, - ) -> grenad::Result<(), MF::Error> { - match self.cache.get_mut(key) { - Some(DelAddRoaringBitmap { del, add: _ }) => { - *del.get_or_insert_with(RoaringBitmap::default) |= bitmap; + n: u32, + ) -> Result<()> { + let hash = hasher.hash_one(key); + let bucket = compute_bucket_from_hash(buckets, hash); + match self.caches[bucket].raw_entry_mut().from_hash(hash, |&k| k == key) { + RawEntryMut::Occupied(mut entry) => { + entry.get_mut().del.get_or_insert_with(RoaringBitmap::default).insert(n); + Ok(()) } - None => { - self.total_insertions += 1; - self.fitted_in_key += (key.len() <= KEY_SIZE) as usize; - let value = DelAddRoaringBitmap::new_del(bitmap); - if let Some((key, deladd)) = self.cache.push(key.into(), value) { - self.write_entry(key, deladd)?; - } + RawEntryMut::Vacant(_entry) => { + let deladd = DelAddRoaringBitmap::new_del_u32(n); + spill_entry_to_sorter( + &mut self.spilled_entries[bucket], + &mut self.deladd_buffer, + &mut self.cbo_buffer, + key, + deladd, + ) } } - - Ok(()) } - pub fn insert_add_u32(&mut self, key: &[u8], n: u32) -> grenad::Result<(), MF::Error> { - match self.cache.get_mut(key) { - Some(DelAddRoaringBitmap { del: _, add }) => { - add.get_or_insert_with(RoaringBitmap::default).insert(n); - } - None => { - self.total_insertions += 1; - self.fitted_in_key += (key.len() <= KEY_SIZE) as usize; - let value = DelAddRoaringBitmap::new_add_u32(n); - if let Some((key, deladd)) = self.cache.push(key.into(), value) { - self.write_entry(key, deladd)?; - } - } - } - - Ok(()) - } - - pub fn insert_add( + pub fn insert_add_u32( &mut self, + hasher: &FxBuildHasher, + buckets: usize, key: &[u8], - bitmap: RoaringBitmap, - ) -> grenad::Result<(), MF::Error> { - match self.cache.get_mut(key) { - Some(DelAddRoaringBitmap { del: _, add }) => { - *add.get_or_insert_with(RoaringBitmap::default) |= bitmap; + n: u32, + ) -> Result<()> { + let hash = hasher.hash_one(key); + let bucket = compute_bucket_from_hash(buckets, hash); + match self.caches[bucket].raw_entry_mut().from_hash(hash, |&k| k == key) { + RawEntryMut::Occupied(mut entry) => { + entry.get_mut().add.get_or_insert_with(RoaringBitmap::default).insert(n); + Ok(()) } - None => { - self.total_insertions += 1; - self.fitted_in_key += (key.len() <= KEY_SIZE) as usize; - let value = DelAddRoaringBitmap::new_add(bitmap); - if let Some((key, deladd)) = self.cache.push(key.into(), value) { - self.write_entry(key, deladd)?; + RawEntryMut::Vacant(_entry) => { + let deladd = DelAddRoaringBitmap::new_add_u32(n); + spill_entry_to_sorter( + &mut self.spilled_entries[bucket], + &mut self.deladd_buffer, + &mut self.cbo_buffer, + key, + deladd, + ) + } + } + } +} + +#[inline] +fn compute_bucket_from_hash(buckets: usize, hash: u64) -> usize { + hash as usize % buckets +} + +fn spill_entry_to_sorter( + spilled_entries: &mut grenad::Sorter, + deladd_buffer: &mut Vec, + cbo_buffer: &mut Vec, + key: &[u8], + deladd: DelAddRoaringBitmap, +) -> Result<()> { + deladd_buffer.clear(); + let mut value_writer = KvWriterDelAdd::new(deladd_buffer); + + match deladd { + DelAddRoaringBitmap { del: Some(del), add: None } => { + cbo_buffer.clear(); + CboRoaringBitmapCodec::serialize_into(&del, cbo_buffer); + value_writer.insert(DelAdd::Deletion, &cbo_buffer)?; + } + DelAddRoaringBitmap { del: None, add: Some(add) } => { + cbo_buffer.clear(); + CboRoaringBitmapCodec::serialize_into(&add, cbo_buffer); + value_writer.insert(DelAdd::Addition, &cbo_buffer)?; + } + DelAddRoaringBitmap { del: Some(del), add: Some(add) } => { + cbo_buffer.clear(); + CboRoaringBitmapCodec::serialize_into(&del, cbo_buffer); + value_writer.insert(DelAdd::Deletion, &cbo_buffer)?; + + cbo_buffer.clear(); + CboRoaringBitmapCodec::serialize_into(&add, cbo_buffer); + value_writer.insert(DelAdd::Addition, &cbo_buffer)?; + } + DelAddRoaringBitmap { del: None, add: None } => return Ok(()), + } + + let bytes = value_writer.into_inner().unwrap(); + spilled_entries.insert(key, bytes).map_err(Into::into) +} + +pub struct FrozenCache<'a, 'extractor> { + bucket: usize, + cache: FrozenMap<'a, 'extractor, &'extractor [u8], DelAddRoaringBitmap, FxBuildHasher>, + spilled: Vec>>, +} + +pub fn transpose_and_freeze_caches<'a, 'extractor>( + caches: &'a mut [BalancedCaches<'extractor>], +) -> Result>>> { + let width = caches.first().map(BalancedCaches::buckets).unwrap_or(0); + let mut bucket_caches: Vec<_> = iter::repeat_with(Vec::new).take(width).collect(); + + for thread_cache in caches { + for frozen in thread_cache.freeze()? { + bucket_caches[frozen.bucket].push(frozen); + } + } + + Ok(bucket_caches) +} + +/// Merges the caches that must be all associated to the same bucket. +/// +/// # Panics +/// +/// - If the bucket IDs in these frozen caches are not exactly the same. +pub fn merge_caches(frozen: Vec, mut f: F) -> Result<()> +where + F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>, +{ + let mut maps = Vec::new(); + let mut readers = Vec::new(); + let mut current_bucket = None; + for FrozenCache { bucket, cache, ref mut spilled } in frozen { + assert_eq!(*current_bucket.get_or_insert(bucket), bucket); + maps.push(cache); + readers.append(spilled); + } + + // First manage the spilled entries by looking into the HashMaps, + // merge them and mark them as dummy. + let mut heap = BinaryHeap::new(); + for (source_index, source) in readers.into_iter().enumerate() { + let mut cursor = source.into_cursor()?; + if cursor.move_on_next()?.is_some() { + heap.push(Entry { cursor, source_index }); + } + } + + loop { + let mut first_entry = match heap.pop() { + Some(entry) => entry, + None => break, + }; + + let (first_key, first_value) = match first_entry.cursor.current() { + Some((key, value)) => (key, value), + None => break, + }; + + let mut output = DelAddRoaringBitmap::from_bytes(first_value)?; + while let Some(mut entry) = heap.peek_mut() { + if let Some((key, _value)) = entry.cursor.current() { + if first_key == key { + let new = DelAddRoaringBitmap::from_bytes(first_value)?; + output = output.merge(new); + // When we are done we the current value of this entry move make + // it move forward and let the heap reorganize itself (on drop) + if entry.cursor.move_on_next()?.is_none() { + PeekMut::pop(entry); + } + } else { + break; } } } - Ok(()) - } - - pub fn insert_del_add_u32(&mut self, key: &[u8], n: u32) -> grenad::Result<(), MF::Error> { - match self.cache.get_mut(key) { - Some(DelAddRoaringBitmap { del, add }) => { - del.get_or_insert_with(RoaringBitmap::default).insert(n); - add.get_or_insert_with(RoaringBitmap::default).insert(n); - } - None => { - self.total_insertions += 1; - self.fitted_in_key += (key.len() <= KEY_SIZE) as usize; - let value = DelAddRoaringBitmap::new_del_add_u32(n); - if let Some((key, deladd)) = self.cache.push(key.into(), value) { - self.write_entry(key, deladd)?; + // Once we merged all of the spilled bitmaps we must also + // fetch the entries from the non-spilled entries (the HashMaps). + for (map_index, map) in maps.iter_mut().enumerate() { + if first_entry.source_index != map_index { + if let Some(new) = map.get_mut(first_key) { + output = output.merge(mem::take(new)); } } } - Ok(()) + // We send the merged entry outside. + (f)(first_key, output)?; + + // Don't forget to put the first entry back into the heap. + if first_entry.cursor.move_on_next()?.is_some() { + heap.push(first_entry) + } } - fn write_entry>( - &mut self, - key: A, - deladd: DelAddRoaringBitmap, - ) -> grenad::Result<(), MF::Error> { - /// TODO we must create a serialization trait to correctly serialize bitmaps - self.deladd_buffer.clear(); - let mut value_writer = KvWriterDelAdd::new(&mut self.deladd_buffer); - match deladd { - DelAddRoaringBitmap { del: Some(del), add: None } => { - self.cbo_buffer.clear(); - CboRoaringBitmapCodec::serialize_into(&del, &mut self.cbo_buffer); - value_writer.insert(DelAdd::Deletion, &self.cbo_buffer)?; - } - DelAddRoaringBitmap { del: None, add: Some(add) } => { - self.cbo_buffer.clear(); - CboRoaringBitmapCodec::serialize_into(&add, &mut self.cbo_buffer); - value_writer.insert(DelAdd::Addition, &self.cbo_buffer)?; - } - DelAddRoaringBitmap { del: Some(del), add: Some(add) } => { - self.cbo_buffer.clear(); - CboRoaringBitmapCodec::serialize_into(&del, &mut self.cbo_buffer); - value_writer.insert(DelAdd::Deletion, &self.cbo_buffer)?; + // Then manage the content on the HashMap entries that weren't taken (mem::take). + while let Some(mut map) = maps.pop() { + for (key, output) in map.iter_mut() { + let mut output = mem::take(output); - self.cbo_buffer.clear(); - CboRoaringBitmapCodec::serialize_into(&add, &mut self.cbo_buffer); - value_writer.insert(DelAdd::Addition, &self.cbo_buffer)?; + // Make sure we don't try to work with entries already managed by the spilled + if !output.is_empty() { + for rhs in maps.iter_mut() { + if let Some(new) = rhs.get_mut(key) { + output = output.merge(mem::take(new)); + } + } + + // We send the merged entry outside. + (f)(key, output)?; } - DelAddRoaringBitmap { del: None, add: None } => return Ok(()), } - let bytes = value_writer.into_inner().unwrap(); - self.sorter.insert(key, bytes) } - pub fn direct_insert(&mut self, key: &[u8], val: &[u8]) -> grenad::Result<(), MF::Error> { - self.sorter.insert(key, val) - } + Ok(()) +} - pub fn into_sorter(mut self) -> grenad::Result, MF::Error> { - let mut all_n_containers = Vec::new(); - let mut all_n_array_containers = Vec::new(); - let mut all_n_bitset_containers = Vec::new(); - let mut all_n_values_array_containers = Vec::new(); - let mut all_n_values_bitset_containers = Vec::new(); - let mut all_cardinality = Vec::new(); +struct Entry { + cursor: ReaderCursor, + source_index: usize, +} - let default_arc = Lru::new(NonZeroUsize::MIN); - for (key, deladd) in mem::replace(&mut self.cache, default_arc) { - for bitmap in [&deladd.del, &deladd.add].into_iter().flatten() { - let Statistics { - n_containers, - n_array_containers, - n_bitset_containers, - n_values_array_containers, - n_values_bitset_containers, - cardinality, - .. - } = bitmap.statistics(); - all_n_containers.push(n_containers); - all_n_array_containers.push(n_array_containers); - all_n_bitset_containers.push(n_bitset_containers); - all_n_values_array_containers.push(n_values_array_containers); - all_n_values_bitset_containers.push(n_values_bitset_containers); - all_cardinality.push(cardinality as u32); - } - - self.write_entry(key, deladd)?; - } - - let mut output = String::new(); - - for (name, mut slice) in [ - ("n_containers", all_n_containers), - ("n_array_containers", all_n_array_containers), - ("n_bitset_containers", all_n_bitset_containers), - ("n_values_array_containers", all_n_values_array_containers), - ("n_values_bitset_containers", all_n_values_bitset_containers), - ("cardinality", all_cardinality), - ] { - let _ = writeln!(&mut output, "{name} (p100) {:?}", Stats::from_slice(&mut slice)); - // let _ = writeln!(&mut output, "{name} (p99) {:?}", Stats::from_slice_p99(&mut slice)); - } - - let _ = writeln!( - &mut output, - "LruCache stats: {} <= {KEY_SIZE} bytes ({}%) on a total of {} insertions", - self.fitted_in_key, - (self.fitted_in_key as f32 / self.total_insertions as f32) * 100.0, - self.total_insertions, - ); - - eprintln!("{output}"); - - Ok(self.sorter) +impl Ord for Entry { + fn cmp(&self, other: &Entry) -> Ordering { + let skey = self.cursor.current().map(|(k, _)| k); + let okey = other.cursor.current().map(|(k, _)| k); + skey.cmp(&okey).then(self.source_index.cmp(&other.source_index)).reverse() } } -#[derive(Default, Debug)] -struct Stats { - pub len: usize, - pub average: f32, - pub mean: u32, - pub min: u32, - pub max: u32, -} +impl Eq for Entry {} -impl Stats { - fn from_slice(slice: &mut [u32]) -> Stats { - slice.sort_unstable(); - Self::from_sorted_slice(slice) - } - - fn from_slice_p99(slice: &mut [u32]) -> Stats { - slice.sort_unstable(); - let new_len = slice.len() - (slice.len() as f32 / 100.0) as usize; - match slice.get(..new_len) { - Some(slice) => Self::from_sorted_slice(slice), - None => Stats::default(), - } - } - - fn from_sorted_slice(slice: &[u32]) -> Stats { - let sum: f64 = slice.iter().map(|i| *i as f64).sum(); - let average = (sum / slice.len() as f64) as f32; - let mean = *slice.len().checked_div(2).and_then(|middle| slice.get(middle)).unwrap_or(&0); - let min = *slice.first().unwrap_or(&0); - let max = *slice.last().unwrap_or(&0); - Stats { len: slice.len(), average, mean, min, max } +impl PartialEq for Entry { + fn eq(&self, other: &Entry) -> bool { + self.cmp(other) == Ordering::Equal } } -#[derive(Debug, Clone)] +impl PartialOrd for Entry { + fn partial_cmp(&self, other: &Entry) -> Option { + Some(self.cmp(other)) + } +} + +#[derive(Debug, Default, Clone)] pub struct DelAddRoaringBitmap { - pub(crate) del: Option, - pub(crate) add: Option, + pub del: Option, + pub add: Option, } impl DelAddRoaringBitmap { - fn new_del_add_u32(n: u32) -> Self { - DelAddRoaringBitmap { - del: Some(RoaringBitmap::from([n])), - add: Some(RoaringBitmap::from([n])), - } + fn from_bytes(bytes: &[u8]) -> io::Result { + let reader = KvReaderDelAdd::from_slice(bytes); + + let del = match reader.get(DelAdd::Deletion) { + Some(bytes) => CboRoaringBitmapCodec::deserialize_from(bytes).map(Some)?, + None => None, + }; + + let add = match reader.get(DelAdd::Addition) { + Some(bytes) => CboRoaringBitmapCodec::deserialize_from(bytes).map(Some)?, + None => None, + }; + + Ok(DelAddRoaringBitmap { del, add }) } - fn new_del(bitmap: RoaringBitmap) -> Self { - DelAddRoaringBitmap { del: Some(bitmap), add: None } + pub fn empty() -> DelAddRoaringBitmap { + DelAddRoaringBitmap { del: None, add: None } } - fn new_del_u32(n: u32) -> Self { + pub fn is_empty(&self) -> bool { + let DelAddRoaringBitmap { del, add } = self; + del.is_none() && add.is_none() + } + + pub fn insert_del_u32(&mut self, n: u32) { + self.del.get_or_insert_with(RoaringBitmap::new).insert(n); + } + + pub fn insert_add_u32(&mut self, n: u32) { + self.add.get_or_insert_with(RoaringBitmap::new).insert(n); + } + + pub fn new_del_u32(n: u32) -> Self { DelAddRoaringBitmap { del: Some(RoaringBitmap::from([n])), add: None } } - fn new_add(bitmap: RoaringBitmap) -> Self { - DelAddRoaringBitmap { del: None, add: Some(bitmap) } - } - - fn new_add_u32(n: u32) -> Self { + pub fn new_add_u32(n: u32) -> Self { DelAddRoaringBitmap { del: None, add: Some(RoaringBitmap::from([n])) } } + + pub fn merge(self, rhs: DelAddRoaringBitmap) -> DelAddRoaringBitmap { + let DelAddRoaringBitmap { del, add } = self; + let DelAddRoaringBitmap { del: ndel, add: nadd } = rhs; + + let del = match (del, ndel) { + (None, None) => None, + (None, Some(del)) | (Some(del), None) => Some(del), + (Some(del), Some(ndel)) => Some(del | ndel), + }; + + let add = match (add, nadd) { + (None, None) => None, + (None, Some(add)) | (Some(add), None) => Some(add), + (Some(add), Some(nadd)) => Some(add | nadd), + }; + + DelAddRoaringBitmap { del, add } + } + + pub fn apply_to(&self, documents_ids: &mut RoaringBitmap) { + let DelAddRoaringBitmap { del, add } = self; + + if let Some(del) = del { + *documents_ids -= del; + } + + if let Some(add) = add { + *documents_ids |= add; + } + } } diff --git a/milli/src/update/new/extract/documents.rs b/milli/src/update/new/extract/documents.rs new file mode 100644 index 000000000..21fe4d518 --- /dev/null +++ b/milli/src/update/new/extract/documents.rs @@ -0,0 +1,73 @@ +use std::cell::RefCell; + +use bumpalo::Bump; + +use super::DelAddRoaringBitmap; +use crate::update::new::channel::DocumentsSender; +use crate::update::new::document::write_to_obkv; +use crate::update::new::indexer::document_changes::{ + DocumentChangeContext, Extractor, FullySend, RefCellExt as _, +}; +use crate::update::new::DocumentChange; +use crate::Result; + +pub struct DocumentsExtractor<'a> { + documents_sender: &'a DocumentsSender<'a>, +} + +impl<'a> DocumentsExtractor<'a> { + pub fn new(documents_sender: &'a DocumentsSender<'a>) -> Self { + Self { documents_sender } + } +} + +impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> { + type Data = FullySend>; + + fn init_data(&self, _extractor_alloc: &'extractor Bump) -> Result { + Ok(FullySend(RefCell::new(DelAddRoaringBitmap::empty()))) + } + + fn process( + &self, + change: DocumentChange, + context: &DocumentChangeContext, + ) -> Result<()> { + let mut document_buffer = Vec::new(); + let mut delta_documents_ids = context.data.0.borrow_mut_or_yield(); + + let new_fields_ids_map = context.new_fields_ids_map.borrow_or_yield(); + let new_fields_ids_map = &*new_fields_ids_map; + let new_fields_ids_map = new_fields_ids_map.local_map(); + + let external_docid = change.external_docid().to_owned(); + + // document but we need to create a function that collects and compresses documents. + match change { + DocumentChange::Deletion(deletion) => { + let docid = deletion.docid(); + self.documents_sender.delete(docid, external_docid).unwrap(); + delta_documents_ids.insert_del_u32(docid); + } + /// TODO: change NONE by SOME(vector) when implemented + DocumentChange::Update(update) => { + let docid = update.docid(); + let content = + update.new(&context.txn, context.index, &context.db_fields_ids_map)?; + let content = + write_to_obkv(&content, None, new_fields_ids_map, &mut document_buffer)?; + self.documents_sender.uncompressed(docid, external_docid, content).unwrap(); + } + DocumentChange::Insertion(insertion) => { + let docid = insertion.docid(); + let content = insertion.new(); + let content = + write_to_obkv(&content, None, new_fields_ids_map, &mut document_buffer)?; + self.documents_sender.uncompressed(docid, external_docid, content).unwrap(); + delta_documents_ids.insert_add_u32(docid); + // extracted_dictionary_sender.send(self, dictionary: &[u8]); + } + } + Ok(()) + } +} diff --git a/milli/src/update/new/extract/faceted/extract_facets.rs b/milli/src/update/new/extract/faceted/extract_facets.rs index 9f3ed18d8..6844dd6f2 100644 --- a/milli/src/update/new/extract/faceted/extract_facets.rs +++ b/milli/src/update/new/extract/faceted/extract_facets.rs @@ -1,16 +1,12 @@ use std::cell::RefCell; use std::collections::HashSet; -use std::fmt::Debug; -use std::fs::File; use std::ops::DerefMut as _; use bumpalo::Bump; -use grenad::{MergeFunction, Merger}; use heed::RoTxn; -use rayon::iter::{ParallelBridge as _, ParallelIterator as _}; use serde_json::Value; -use super::super::cache::CboCachedSorter; +use super::super::cache::BalancedCaches; use super::facet_document::extract_document_facets; use super::FacetKind; use crate::facet::value_encoding::f64_into_bytes; @@ -20,44 +16,30 @@ use crate::update::new::indexer::document_changes::{ IndexingContext, RefCellExt, ThreadLocal, }; use crate::update::new::DocumentChange; -use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps}; +use crate::update::GrenadParameters; use crate::{DocumentId, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH}; -pub struct FacetedExtractorData<'extractor> { - attributes_to_extract: &'extractor [&'extractor str], +pub struct FacetedExtractorData<'a> { + attributes_to_extract: &'a [&'a str], grenad_parameters: GrenadParameters, - max_memory: Option, + buckets: usize, } -impl<'extractor> Extractor<'extractor> for FacetedExtractorData<'extractor> { - type Data = FullySend>>; +impl<'a, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a> { + type Data = RefCell>; - fn init_data( - &self, - _extractor_alloc: raw_collections::alloc::RefBump<'extractor>, - ) -> Result { - Ok(FullySend(RefCell::new(CboCachedSorter::new( - // TODO use a better value - 1_000_000.try_into().unwrap(), - create_sorter( - grenad::SortAlgorithm::Stable, - MergeDeladdCboRoaringBitmaps, - self.grenad_parameters.chunk_compression_type, - self.grenad_parameters.chunk_compression_level, - self.grenad_parameters.max_nb_chunks, - self.max_memory, - // *NOTE*: this must not be set to true: - // 1. we're already using max parallelism in the pool, so it wouldn't help - // 2. it creates correctness issues if it causes to yield a borrow-mut wielding task - false, - ), - )))) + fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result { + Ok(RefCell::new(BalancedCaches::new_in( + self.buckets, + self.grenad_parameters.max_memory, + extractor_alloc, + ))) } fn process( &self, change: DocumentChange, - context: &crate::update::new::indexer::document_changes::DocumentChangeContext, + context: &DocumentChangeContext, ) -> Result<()> { FacetedDocidsExtractor::extract_document_change(context, self.attributes_to_extract, change) } @@ -67,16 +49,14 @@ pub struct FacetedDocidsExtractor; impl FacetedDocidsExtractor { fn extract_document_change( - context: &DocumentChangeContext< - FullySend>>, - >, + context: &DocumentChangeContext>, attributes_to_extract: &[&str], document_change: DocumentChange, ) -> Result<()> { let index = &context.index; let rtxn = &context.txn; let mut new_fields_ids_map = context.new_fields_ids_map.borrow_mut_or_yield(); - let mut cached_sorter = context.data.0.borrow_mut_or_yield(); + let mut cached_sorter = context.data.borrow_mut_or_yield(); match document_change { DocumentChange::Deletion(inner) => extract_document_facets( attributes_to_extract, @@ -86,7 +66,7 @@ impl FacetedDocidsExtractor { Self::facet_fn_with_options( &context.doc_alloc, cached_sorter.deref_mut(), - CboCachedSorter::insert_del_u32, + BalancedCaches::insert_del_u32, inner.docid(), fid, value, @@ -102,7 +82,7 @@ impl FacetedDocidsExtractor { Self::facet_fn_with_options( &context.doc_alloc, cached_sorter.deref_mut(), - CboCachedSorter::insert_del_u32, + BalancedCaches::insert_del_u32, inner.docid(), fid, value, @@ -118,7 +98,7 @@ impl FacetedDocidsExtractor { Self::facet_fn_with_options( &context.doc_alloc, cached_sorter.deref_mut(), - CboCachedSorter::insert_add_u32, + BalancedCaches::insert_add_u32, inner.docid(), fid, value, @@ -134,7 +114,7 @@ impl FacetedDocidsExtractor { Self::facet_fn_with_options( &context.doc_alloc, cached_sorter.deref_mut(), - CboCachedSorter::insert_add_u32, + BalancedCaches::insert_add_u32, inner.docid(), fid, value, @@ -144,25 +124,20 @@ impl FacetedDocidsExtractor { } } - fn facet_fn_with_options( + fn facet_fn_with_options<'extractor>( doc_alloc: &Bump, - cached_sorter: &mut CboCachedSorter, - cache_fn: impl Fn(&mut CboCachedSorter, &[u8], u32) -> grenad::Result<(), MF::Error>, + cached_sorter: &mut BalancedCaches<'extractor>, + cache_fn: impl Fn(&mut BalancedCaches<'extractor>, &[u8], u32) -> Result<()>, docid: DocumentId, fid: FieldId, value: &Value, - ) -> Result<()> - where - MF: MergeFunction, - MF::Error: Debug, - grenad::Error: Into, - { + ) -> Result<()> { let mut buffer = bumpalo::collections::Vec::new_in(doc_alloc); // Exists // key: fid buffer.push(FacetKind::Exists as u8); buffer.extend_from_slice(&fid.to_be_bytes()); - cache_fn(cached_sorter, &buffer, docid).map_err(Into::into)?; + cache_fn(cached_sorter, &buffer, docid)?; match value { // Number @@ -177,8 +152,7 @@ impl FacetedDocidsExtractor { buffer.push(0); // level 0 buffer.extend_from_slice(&ordered); buffer.extend_from_slice(&n.to_be_bytes()); - - cache_fn(cached_sorter, &buffer, docid).map_err(Into::into) + cache_fn(cached_sorter, &buffer, docid) } else { Ok(()) } @@ -193,7 +167,7 @@ impl FacetedDocidsExtractor { buffer.extend_from_slice(&fid.to_be_bytes()); buffer.push(0); // level 0 buffer.extend_from_slice(truncated.as_bytes()); - cache_fn(cached_sorter, &buffer, docid).map_err(Into::into) + cache_fn(cached_sorter, &buffer, docid) } // Null // key: fid @@ -201,7 +175,7 @@ impl FacetedDocidsExtractor { buffer.clear(); buffer.push(FacetKind::Null as u8); buffer.extend_from_slice(&fid.to_be_bytes()); - cache_fn(cached_sorter, &buffer, docid).map_err(Into::into) + cache_fn(cached_sorter, &buffer, docid) } // Empty // key: fid @@ -209,13 +183,13 @@ impl FacetedDocidsExtractor { buffer.clear(); buffer.push(FacetKind::Empty as u8); buffer.extend_from_slice(&fid.to_be_bytes()); - cache_fn(cached_sorter, &buffer, docid).map_err(Into::into) + cache_fn(cached_sorter, &buffer, docid) } Value::Object(o) if o.is_empty() => { buffer.clear(); buffer.push(FacetKind::Empty as u8); buffer.extend_from_slice(&fid.to_be_bytes()); - cache_fn(cached_sorter, &buffer, docid).map_err(Into::into) + cache_fn(cached_sorter, &buffer, docid) } // Otherwise, do nothing /// TODO: What about Value::Bool? @@ -242,16 +216,13 @@ fn truncate_str(s: &str) -> &str { impl DocidsExtractor for FacetedDocidsExtractor { #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")] - fn run_extraction<'pl, 'fid, 'indexer, 'index, DC: DocumentChanges<'pl>>( + fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>>( grenad_parameters: GrenadParameters, document_changes: &DC, indexing_context: IndexingContext<'fid, 'indexer, 'index>, - extractor_allocs: &mut ThreadLocal>>, - ) -> Result> { - let max_memory = grenad_parameters.max_memory_by_thread(); - + extractor_allocs: &'extractor mut ThreadLocal>, + ) -> Result>> { let index = indexing_context.index; - let rtxn = index.read_txn()?; let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?; let attributes_to_extract: Vec<_> = @@ -266,7 +237,7 @@ impl DocidsExtractor for FacetedDocidsExtractor { let extractor = FacetedExtractorData { attributes_to_extract: &attributes_to_extract, grenad_parameters, - max_memory, + buckets: rayon::current_num_threads(), }; for_each_document_change( document_changes, @@ -276,26 +247,7 @@ impl DocidsExtractor for FacetedDocidsExtractor { &datastore, )?; } - { - let mut builder = grenad::MergerBuilder::new(MergeDeladdCboRoaringBitmaps); - let span = - tracing::trace_span!(target: "indexing::documents::extract", "merger_building"); - let _entered = span.enter(); - let readers: Vec<_> = datastore - .into_iter() - .par_bridge() - .map(|cached_sorter| { - let cached_sorter = cached_sorter.0.into_inner(); - let sorter = cached_sorter.into_sorter()?; - sorter.into_reader_cursors() - }) - .collect(); - - for reader in readers { - builder.extend(reader?); - } - Ok(builder.build()) - } + Ok(datastore.into_iter().map(RefCell::into_inner).collect()) } } diff --git a/milli/src/update/new/extract/lru.rs b/milli/src/update/new/extract/lru.rs deleted file mode 100644 index 3eca47cb2..000000000 --- a/milli/src/update/new/extract/lru.rs +++ /dev/null @@ -1,234 +0,0 @@ -use std::borrow::Borrow; -use std::hash::{BuildHasher, Hash}; -use std::iter::repeat_with; -use std::mem; -use std::num::NonZeroUsize; - -use hashbrown::hash_map::{DefaultHashBuilder, Entry}; -use hashbrown::HashMap; - -#[derive(Debug)] -pub struct Lru { - lookup: HashMap, - storage: FixedSizeList>, -} - -impl Lru { - /// Creates a new LRU cache that holds at most `capacity` elements. - pub fn new(capacity: NonZeroUsize) -> Self { - Self { lookup: HashMap::new(), storage: FixedSizeList::new(capacity.get()) } - } -} - -impl Lru { - /// Creates a new LRU cache that holds at most `capacity` elements - /// and uses the provided hash builder to hash keys. - pub fn with_hasher(capacity: NonZeroUsize, hash_builder: S) -> Lru { - Self { - lookup: HashMap::with_hasher(hash_builder), - storage: FixedSizeList::new(capacity.get()), - } - } -} - -impl Lru { - /// Returns a mutable reference to the value of the key in the cache or `None` if it is not present in the cache. - /// - /// Moves the key to the head of the LRU list if it exists. - pub fn get_mut(&mut self, key: &Q) -> Option<&mut V> - where - K: Borrow, - Q: Hash + Eq + ?Sized, - { - let idx = *self.lookup.get(key)?; - self.storage.move_front(idx).map(|node| &mut node.value) - } -} - -impl Lru { - pub fn push(&mut self, key: K, value: V) -> Option<(K, V)> { - match self.lookup.entry(key) { - Entry::Occupied(occ) => { - // It's fine to unwrap here because: - // * the entry already exists - let node = self.storage.move_front(*occ.get()).unwrap(); - let old_value = mem::replace(&mut node.value, value); - let old_key = occ.replace_key(); - Some((old_key, old_value)) - } - Entry::Vacant(vac) => { - let key = vac.key().clone(); - if self.storage.is_full() { - // It's fine to unwrap here because: - // * the cache capacity is non zero - // * the cache is full - let idx = self.storage.back_idx(); - let node = self.storage.move_front(idx).unwrap(); - let LruNode { key, value } = mem::replace(node, LruNode { key, value }); - vac.insert(idx); - self.lookup.remove(&key); - Some((key, value)) - } else { - // It's fine to unwrap here because: - // * the cache capacity is non zero - // * the cache is not full - let (idx, _) = self.storage.push_front(LruNode { key, value }).unwrap(); - vac.insert(idx); - None - } - } - } - } -} - -impl IntoIterator for Lru { - type Item = (K, V); - type IntoIter = IntoIter; - - fn into_iter(self) -> Self::IntoIter { - IntoIter { lookup_iter: self.lookup.into_iter(), nodes: self.storage.nodes } - } -} - -pub struct IntoIter { - lookup_iter: hashbrown::hash_map::IntoIter, - nodes: Box<[Option>>]>, -} - -impl Iterator for IntoIter { - type Item = (K, V); - - fn next(&mut self) -> Option { - let (_key, idx) = self.lookup_iter.next()?; - let LruNode { key, value } = self.nodes.get_mut(idx)?.take()?.data; - Some((key, value)) - } -} - -#[derive(Debug)] -struct LruNode { - key: K, - value: V, -} - -#[derive(Debug)] -struct FixedSizeListNode { - prev: usize, - next: usize, - data: T, -} - -#[derive(Debug)] -struct FixedSizeList { - nodes: Box<[Option>]>, - /// Also corresponds to the first `None` in the nodes. - length: usize, - // TODO Also, we probably do not need one of the front and back cursors. - front: usize, - back: usize, -} - -impl FixedSizeList { - fn new(capacity: usize) -> Self { - Self { - nodes: repeat_with(|| None).take(capacity).collect::>().into_boxed_slice(), - length: 0, - front: usize::MAX, - back: usize::MAX, - } - } - - #[inline] - fn capacity(&self) -> usize { - self.nodes.len() - } - - #[inline] - fn len(&self) -> usize { - self.length - } - - #[inline] - fn is_empty(&self) -> bool { - self.len() == 0 - } - - #[inline] - fn is_full(&self) -> bool { - self.len() == self.capacity() - } - - #[inline] - fn back_idx(&self) -> usize { - self.back - } - - #[inline] - fn next(&mut self) -> Option { - if self.is_full() { - None - } else { - let current_free = self.length; - self.length += 1; - Some(current_free) - } - } - - #[inline] - fn node_mut(&mut self, idx: usize) -> Option<&mut FixedSizeListNode> { - self.nodes.get_mut(idx).and_then(|node| node.as_mut()) - } - - #[inline] - fn node_ref(&self, idx: usize) -> Option<&FixedSizeListNode> { - self.nodes.get(idx).and_then(|node| node.as_ref()) - } - - #[inline] - fn move_front(&mut self, idx: usize) -> Option<&mut T> { - let node = self.nodes.get_mut(idx)?.take()?; - if let Some(prev) = self.node_mut(node.prev) { - prev.next = node.next; - } else { - self.front = node.next; - } - if let Some(next) = self.node_mut(node.next) { - next.prev = node.prev; - } else { - self.back = node.prev; - } - - if let Some(front) = self.node_mut(self.front) { - front.prev = idx; - } - if self.node_ref(self.back).is_none() { - self.back = idx; - } - - let node = self.nodes.get_mut(idx).unwrap().insert(FixedSizeListNode { - prev: usize::MAX, - next: self.front, - data: node.data, - }); - self.front = idx; - Some(&mut node.data) - } - - #[inline] - fn push_front(&mut self, data: T) -> Option<(usize, &mut T)> { - let idx = self.next()?; - if let Some(front) = self.node_mut(self.front) { - front.prev = idx; - } - if self.node_ref(self.back).is_none() { - self.back = idx; - } - let node = self.nodes.get_mut(idx).unwrap().insert(FixedSizeListNode { - prev: usize::MAX, - next: self.front, - data, - }); - self.front = idx; - Some((idx, &mut node.data)) - } -} diff --git a/milli/src/update/new/extract/mod.rs b/milli/src/update/new/extract/mod.rs index 1c86d80af..3271c454f 100644 --- a/milli/src/update/new/extract/mod.rs +++ b/milli/src/update/new/extract/mod.rs @@ -1,27 +1,25 @@ mod cache; +mod documents; mod faceted; -mod lru; mod searchable; -use std::cell::RefCell; -use std::fs::File; - use bumpalo::Bump; +pub use cache::{merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap}; +pub use documents::*; pub use faceted::*; -use grenad::Merger; pub use searchable::*; use super::indexer::document_changes::{DocumentChanges, FullySend, IndexingContext, ThreadLocal}; -use crate::update::{GrenadParameters, MergeDeladdCboRoaringBitmaps}; +use crate::update::GrenadParameters; use crate::Result; pub trait DocidsExtractor { - fn run_extraction<'pl, 'fid, 'indexer, 'index, DC: DocumentChanges<'pl>>( + fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>>( grenad_parameters: GrenadParameters, document_changes: &DC, indexing_context: IndexingContext<'fid, 'indexer, 'index>, - extractor_allocs: &mut ThreadLocal>>, - ) -> Result>; + extractor_allocs: &'extractor mut ThreadLocal>, + ) -> Result>>; } /// TODO move in permissive json pointer diff --git a/milli/src/update/new/extract/searchable/extract_word_docids.rs b/milli/src/update/new/extract/searchable/extract_word_docids.rs index a5cbd3700..23bca784f 100644 --- a/milli/src/update/new/extract/searchable/extract_word_docids.rs +++ b/milli/src/update/new/extract/searchable/extract_word_docids.rs @@ -1,113 +1,46 @@ use std::cell::RefCell; use std::collections::HashMap; -use std::fs::File; use std::mem::size_of; -use std::num::NonZero; use std::ops::DerefMut as _; use bumpalo::collections::vec::Vec as BumpVec; use bumpalo::Bump; -use grenad::{Merger, MergerBuilder}; use heed::RoTxn; use super::tokenize_document::{tokenizer_builder, DocumentTokenizer}; -use crate::update::new::extract::cache::CboCachedSorter; +use crate::update::new::extract::cache::BalancedCaches; use crate::update::new::extract::perm_json_p::contained_in; use crate::update::new::indexer::document_changes::{ for_each_document_change, DocumentChangeContext, DocumentChanges, Extractor, FullySend, - IndexingContext, RefCellExt, ThreadLocal, + IndexingContext, MostlySend, RefCellExt, ThreadLocal, }; use crate::update::new::DocumentChange; -use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps}; +use crate::update::GrenadParameters; use crate::{bucketed_position, DocumentId, FieldId, Index, Result, MAX_POSITION_PER_ATTRIBUTE}; const MAX_COUNTED_WORDS: usize = 30; -pub struct WordDocidsCachedSorters { - word_fid_docids: CboCachedSorter, - word_docids: CboCachedSorter, - exact_word_docids: CboCachedSorter, - word_position_docids: CboCachedSorter, - fid_word_count_docids: CboCachedSorter, +pub struct WordDocidsBalancedCaches<'extractor> { + word_fid_docids: BalancedCaches<'extractor>, + word_docids: BalancedCaches<'extractor>, + exact_word_docids: BalancedCaches<'extractor>, + word_position_docids: BalancedCaches<'extractor>, + fid_word_count_docids: BalancedCaches<'extractor>, fid_word_count: HashMap, current_docid: Option, } -impl WordDocidsCachedSorters { - pub fn new( - indexer: GrenadParameters, - max_memory: Option, - capacity: NonZero, - ) -> Self { - let max_memory = max_memory.map(|max_memory| max_memory / 4); - - let word_fid_docids = CboCachedSorter::new( - capacity, - create_sorter( - grenad::SortAlgorithm::Stable, - MergeDeladdCboRoaringBitmaps, - indexer.chunk_compression_type, - indexer.chunk_compression_level, - indexer.max_nb_chunks, - max_memory, - false, - ), - ); - let word_docids = CboCachedSorter::new( - capacity, - create_sorter( - grenad::SortAlgorithm::Stable, - MergeDeladdCboRoaringBitmaps, - indexer.chunk_compression_type, - indexer.chunk_compression_level, - indexer.max_nb_chunks, - max_memory, - false, - ), - ); - let exact_word_docids = CboCachedSorter::new( - capacity, - create_sorter( - grenad::SortAlgorithm::Stable, - MergeDeladdCboRoaringBitmaps, - indexer.chunk_compression_type, - indexer.chunk_compression_level, - indexer.max_nb_chunks, - max_memory, - false, - ), - ); - let word_position_docids = CboCachedSorter::new( - capacity, - create_sorter( - grenad::SortAlgorithm::Stable, - MergeDeladdCboRoaringBitmaps, - indexer.chunk_compression_type, - indexer.chunk_compression_level, - indexer.max_nb_chunks, - max_memory, - false, - ), - ); - let fid_word_count_docids = CboCachedSorter::new( - capacity, - create_sorter( - grenad::SortAlgorithm::Stable, - MergeDeladdCboRoaringBitmaps, - indexer.chunk_compression_type, - indexer.chunk_compression_level, - indexer.max_nb_chunks, - max_memory, - false, - ), - ); +unsafe impl<'extractor> MostlySend for WordDocidsBalancedCaches<'extractor> {} +impl<'extractor> WordDocidsBalancedCaches<'extractor> { + /// TODO Make sure to give the same max_memory to all of them, without splitting it + pub fn new_in(buckets: usize, max_memory: Option, alloc: &'extractor Bump) -> Self { Self { - word_fid_docids, - word_docids, - exact_word_docids, - word_position_docids, - fid_word_count_docids, + word_fid_docids: BalancedCaches::new_in(buckets, max_memory, alloc), + word_docids: BalancedCaches::new_in(buckets, max_memory, alloc), + exact_word_docids: BalancedCaches::new_in(buckets, max_memory, alloc), + word_position_docids: BalancedCaches::new_in(buckets, max_memory, alloc), + fid_word_count_docids: BalancedCaches::new_in(buckets, max_memory, alloc), fid_word_count: HashMap::new(), current_docid: None, } @@ -198,6 +131,7 @@ impl WordDocidsCachedSorters { .entry(field_id) .and_modify(|(current_count, _new_count)| *current_count += 1) .or_insert((1, 0)); + self.current_docid = Some(docid); Ok(()) @@ -227,37 +161,29 @@ impl WordDocidsCachedSorters { } } -struct WordDocidsMergerBuilders { - word_fid_docids: MergerBuilder, - word_docids: MergerBuilder, - exact_word_docids: MergerBuilder, - word_position_docids: MergerBuilder, - fid_word_count_docids: MergerBuilder, +pub struct WordDocidsCaches<'extractor> { + pub word_docids: Vec>, + pub word_fid_docids: Vec>, + pub exact_word_docids: Vec>, + pub word_position_docids: Vec>, + pub fid_word_count_docids: Vec>, } -pub struct WordDocidsMergers { - pub word_fid_docids: Merger, - pub word_docids: Merger, - pub exact_word_docids: Merger, - pub word_position_docids: Merger, - pub fid_word_count_docids: Merger, -} - -impl WordDocidsMergerBuilders { +impl<'extractor> WordDocidsCaches<'extractor> { fn new() -> Self { Self { - word_fid_docids: MergerBuilder::new(MergeDeladdCboRoaringBitmaps), - word_docids: MergerBuilder::new(MergeDeladdCboRoaringBitmaps), - exact_word_docids: MergerBuilder::new(MergeDeladdCboRoaringBitmaps), - word_position_docids: MergerBuilder::new(MergeDeladdCboRoaringBitmaps), - fid_word_count_docids: MergerBuilder::new(MergeDeladdCboRoaringBitmaps), + word_docids: Vec::new(), + word_fid_docids: Vec::new(), + exact_word_docids: Vec::new(), + word_position_docids: Vec::new(), + fid_word_count_docids: Vec::new(), } } - fn add_sorters(&mut self, other: WordDocidsCachedSorters) -> Result<()> { - let WordDocidsCachedSorters { - word_fid_docids, + fn push(&mut self, other: WordDocidsBalancedCaches<'extractor>) -> Result<()> { + let WordDocidsBalancedCaches { word_docids, + word_fid_docids, exact_word_docids, word_position_docids, fid_word_count_docids, @@ -265,78 +191,37 @@ impl WordDocidsMergerBuilders { current_docid: _, } = other; - let mut word_fid_docids_readers = Ok(vec![]); - let mut word_docids_readers = Ok(vec![]); - let mut exact_word_docids_readers = Ok(vec![]); - let mut word_position_docids_readers = Ok(vec![]); - let mut fid_word_count_docids_readers = Ok(vec![]); - rayon::scope(|s| { - s.spawn(|_| { - word_fid_docids_readers = - word_fid_docids.into_sorter().and_then(|s| s.into_reader_cursors()); - }); - s.spawn(|_| { - word_docids_readers = - word_docids.into_sorter().and_then(|s| s.into_reader_cursors()); - }); - s.spawn(|_| { - exact_word_docids_readers = - exact_word_docids.into_sorter().and_then(|s| s.into_reader_cursors()); - }); - s.spawn(|_| { - word_position_docids_readers = - word_position_docids.into_sorter().and_then(|s| s.into_reader_cursors()); - }); - s.spawn(|_| { - fid_word_count_docids_readers = - fid_word_count_docids.into_sorter().and_then(|s| s.into_reader_cursors()); - }); - }); - self.word_fid_docids.extend(word_fid_docids_readers?); - self.word_docids.extend(word_docids_readers?); - self.exact_word_docids.extend(exact_word_docids_readers?); - self.word_position_docids.extend(word_position_docids_readers?); - self.fid_word_count_docids.extend(fid_word_count_docids_readers?); + self.word_docids.push(word_docids); + self.word_fid_docids.push(word_fid_docids); + self.exact_word_docids.push(exact_word_docids); + self.word_position_docids.push(word_position_docids); + self.fid_word_count_docids.push(fid_word_count_docids); Ok(()) } - - fn build(self) -> WordDocidsMergers { - WordDocidsMergers { - word_fid_docids: self.word_fid_docids.build(), - word_docids: self.word_docids.build(), - exact_word_docids: self.exact_word_docids.build(), - word_position_docids: self.word_position_docids.build(), - fid_word_count_docids: self.fid_word_count_docids.build(), - } - } } -pub struct WordDocidsExtractorData<'extractor> { - tokenizer: &'extractor DocumentTokenizer<'extractor>, +pub struct WordDocidsExtractorData<'a> { + tokenizer: &'a DocumentTokenizer<'a>, grenad_parameters: GrenadParameters, - max_memory: Option, + buckets: usize, } -impl<'extractor> Extractor<'extractor> for WordDocidsExtractorData<'extractor> { - type Data = FullySend>; +impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> { + type Data = RefCell>>; - fn init_data( - &self, - _extractor_alloc: raw_collections::alloc::RefBump<'extractor>, - ) -> Result { - Ok(FullySend(RefCell::new(WordDocidsCachedSorters::new( - self.grenad_parameters, - self.max_memory, - // TODO use a better value - 200_000.try_into().unwrap(), + fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result { + Ok(RefCell::new(Some(WordDocidsBalancedCaches::new_in( + self.buckets, + self.grenad_parameters.max_memory, + extractor_alloc, )))) } fn process( &self, change: DocumentChange, - context: &crate::update::new::indexer::document_changes::DocumentChangeContext, + context: &DocumentChangeContext, ) -> Result<()> { WordDocidsExtractors::extract_document_change(context, self.tokenizer, change) } @@ -345,16 +230,15 @@ impl<'extractor> Extractor<'extractor> for WordDocidsExtractorData<'extractor> { pub struct WordDocidsExtractors; impl WordDocidsExtractors { - pub fn run_extraction<'pl, 'fid, 'indexer, 'index, DC: DocumentChanges<'pl>>( + pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>>( grenad_parameters: GrenadParameters, document_changes: &DC, indexing_context: IndexingContext<'fid, 'indexer, 'index>, - extractor_allocs: &mut ThreadLocal>>, - ) -> Result { - let max_memory = grenad_parameters.max_memory_by_thread(); + extractor_allocs: &'extractor mut ThreadLocal>, + ) -> Result> { let index = indexing_context.index; - let rtxn = index.read_txn()?; + let stop_words = index.stop_words(&rtxn)?; let allowed_separators = index.allowed_separators(&rtxn)?; let allowed_separators: Option> = @@ -392,7 +276,7 @@ impl WordDocidsExtractors { let extractor = WordDocidsExtractorData { tokenizer: &document_tokenizer, grenad_parameters, - max_memory, + buckets: rayon::current_num_threads(), }; for_each_document_change( @@ -404,28 +288,23 @@ impl WordDocidsExtractors { )?; } - { - let span = - tracing::trace_span!(target: "indexing::documents::extract", "merger_building"); - let _entered = span.enter(); - let mut builder = WordDocidsMergerBuilders::new(); - for cache in datastore.into_iter().map(|cache| cache.0.into_inner()) { - builder.add_sorters(cache)?; - } - - Ok(builder.build()) + let mut merger = WordDocidsCaches::new(); + for cache in datastore.into_iter().flat_map(RefCell::into_inner) { + merger.push(cache)?; } + + Ok(merger) } fn extract_document_change( - context: &DocumentChangeContext>>, + context: &DocumentChangeContext>>, document_tokenizer: &DocumentTokenizer, document_change: DocumentChange, ) -> Result<()> { let index = &context.index; let rtxn = &context.txn; - let mut cached_sorter = context.data.0.borrow_mut_or_yield(); - let cached_sorter = cached_sorter.deref_mut(); + let mut cached_sorter_ref = context.data.borrow_mut_or_yield(); + let cached_sorter = cached_sorter_ref.as_mut().unwrap(); let mut new_fields_ids_map = context.new_fields_ids_map.borrow_mut_or_yield(); let new_fields_ids_map = new_fields_ids_map.deref_mut(); let doc_alloc = &context.doc_alloc; @@ -436,16 +315,14 @@ impl WordDocidsExtractors { match document_change { DocumentChange::Deletion(inner) => { let mut token_fn = |fname: &str, fid, pos, word: &str| { - cached_sorter - .insert_del_u32( - fid, - pos, - word, - is_exact_attribute(fname), - inner.docid(), - doc_alloc, - ) - .map_err(crate::Error::from) + cached_sorter.insert_del_u32( + fid, + pos, + word, + is_exact_attribute(fname), + inner.docid(), + doc_alloc, + ) }; document_tokenizer.tokenize_document( inner.current(rtxn, index, context.db_fields_ids_map)?, @@ -455,16 +332,14 @@ impl WordDocidsExtractors { } DocumentChange::Update(inner) => { let mut token_fn = |fname: &str, fid, pos, word: &str| { - cached_sorter - .insert_del_u32( - fid, - pos, - word, - is_exact_attribute(fname), - inner.docid(), - doc_alloc, - ) - .map_err(crate::Error::from) + cached_sorter.insert_del_u32( + fid, + pos, + word, + is_exact_attribute(fname), + inner.docid(), + doc_alloc, + ) }; document_tokenizer.tokenize_document( inner.current(rtxn, index, context.db_fields_ids_map)?, @@ -473,16 +348,14 @@ impl WordDocidsExtractors { )?; let mut token_fn = |fname: &str, fid, pos, word: &str| { - cached_sorter - .insert_add_u32( - fid, - pos, - word, - is_exact_attribute(fname), - inner.docid(), - doc_alloc, - ) - .map_err(crate::Error::from) + cached_sorter.insert_add_u32( + fid, + pos, + word, + is_exact_attribute(fname), + inner.docid(), + doc_alloc, + ) }; document_tokenizer.tokenize_document( inner.new(rtxn, index, context.db_fields_ids_map)?, @@ -492,16 +365,14 @@ impl WordDocidsExtractors { } DocumentChange::Insertion(inner) => { let mut token_fn = |fname: &str, fid, pos, word: &str| { - cached_sorter - .insert_add_u32( - fid, - pos, - word, - is_exact_attribute(fname), - inner.docid(), - doc_alloc, - ) - .map_err(crate::Error::from) + cached_sorter.insert_add_u32( + fid, + pos, + word, + is_exact_attribute(fname), + inner.docid(), + doc_alloc, + ) }; document_tokenizer.tokenize_document( inner.new(), diff --git a/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs b/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs index 53e6515a9..6f354688c 100644 --- a/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs +++ b/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs @@ -8,13 +8,13 @@ use super::tokenize_document::DocumentTokenizer; use super::SearchableExtractor; use crate::proximity::{index_proximity, MAX_DISTANCE}; use crate::update::new::document::Document; -use crate::update::new::extract::cache::CboCachedSorter; -use crate::update::new::indexer::document_changes::{DocumentChangeContext, FullySend, RefCellExt}; +use crate::update::new::extract::cache::BalancedCaches; +use crate::update::new::indexer::document_changes::{DocumentChangeContext, RefCellExt}; use crate::update::new::DocumentChange; -use crate::update::MergeDeladdCboRoaringBitmaps; use crate::{FieldId, GlobalFieldsIdsMap, Index, Result}; pub struct WordPairProximityDocidsExtractor; + impl SearchableExtractor for WordPairProximityDocidsExtractor { fn attributes_to_extract<'a>( rtxn: &'a RoTxn, @@ -28,11 +28,10 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor { } // This method is reimplemented to count the number of words in the document in each field - // and to store the docids of the documents that have a number of words in a given field equal to or under than MAX_COUNTED_WORDS. + // and to store the docids of the documents that have a number of words in a given field + // equal to or under than MAX_COUNTED_WORDS. fn extract_document_change( - context: &DocumentChangeContext< - FullySend>>, - >, + context: &DocumentChangeContext>, document_tokenizer: &DocumentTokenizer, document_change: DocumentChange, ) -> Result<()> { @@ -48,7 +47,7 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor { let mut new_fields_ids_map = context.new_fields_ids_map.borrow_mut_or_yield(); let new_fields_ids_map = &mut *new_fields_ids_map; - let mut cached_sorter = context.data.0.borrow_mut_or_yield(); + let mut cached_sorter = context.data.borrow_mut_or_yield(); let cached_sorter = &mut *cached_sorter; // is a vecdequeue, and will be smol, so can stay on the heap for now @@ -139,7 +138,7 @@ fn build_key<'a>( fn word_positions_into_word_pair_proximity( word_positions: &mut VecDeque<(Rc, u16)>, word_pair_proximity: &mut impl FnMut((Rc, Rc), u8), -) -> Result<()> { +) { let (head_word, head_position) = word_positions.pop_front().unwrap(); for (word, position) in word_positions.iter() { let prox = index_proximity(head_position as u32, *position as u32) as u8; @@ -147,7 +146,6 @@ fn word_positions_into_word_pair_proximity( word_pair_proximity((head_word.clone(), word.clone()), prox); } } - Ok(()) } fn process_document_tokens<'doc>( @@ -163,7 +161,7 @@ fn process_document_tokens<'doc>( .front() .map_or(false, |(_w, p)| index_proximity(*p as u32, pos as u32) >= MAX_DISTANCE) { - word_positions_into_word_pair_proximity(word_positions, word_pair_proximity)?; + word_positions_into_word_pair_proximity(word_positions, word_pair_proximity); } // insert the new word. @@ -173,7 +171,7 @@ fn process_document_tokens<'doc>( document_tokenizer.tokenize_document(document, fields_ids_map, &mut token_fn)?; while !word_positions.is_empty() { - word_positions_into_word_pair_proximity(word_positions, word_pair_proximity)?; + word_positions_into_word_pair_proximity(word_positions, word_pair_proximity); } Ok(()) diff --git a/milli/src/update/new/extract/searchable/mod.rs b/milli/src/update/new/extract/searchable/mod.rs index 8934ee892..374718def 100644 --- a/milli/src/update/new/extract/searchable/mod.rs +++ b/milli/src/update/new/extract/searchable/mod.rs @@ -3,76 +3,60 @@ mod extract_word_pair_proximity_docids; mod tokenize_document; use std::cell::RefCell; -use std::fs::File; use std::marker::PhantomData; use bumpalo::Bump; -pub use extract_word_docids::{WordDocidsExtractors, WordDocidsMergers}; +pub use extract_word_docids::{WordDocidsCaches, WordDocidsExtractors}; pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor; -use grenad::Merger; use heed::RoTxn; -use rayon::iter::{ParallelBridge, ParallelIterator}; use tokenize_document::{tokenizer_builder, DocumentTokenizer}; -use super::cache::CboCachedSorter; +use super::cache::BalancedCaches; use super::DocidsExtractor; use crate::update::new::indexer::document_changes::{ for_each_document_change, DocumentChangeContext, DocumentChanges, Extractor, FullySend, IndexingContext, ThreadLocal, }; use crate::update::new::DocumentChange; -use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps}; +use crate::update::GrenadParameters; use crate::{Index, Result, MAX_POSITION_PER_ATTRIBUTE}; -pub struct SearchableExtractorData<'extractor, EX: SearchableExtractor> { - tokenizer: &'extractor DocumentTokenizer<'extractor>, +pub struct SearchableExtractorData<'a, EX: SearchableExtractor> { + tokenizer: &'a DocumentTokenizer<'a>, grenad_parameters: GrenadParameters, - max_memory: Option, + buckets: usize, _ex: PhantomData, } -impl<'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor> - for SearchableExtractorData<'extractor, EX> +impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor> + for SearchableExtractorData<'a, EX> { - type Data = FullySend>>; + type Data = RefCell>; - fn init_data( - &self, - _extractor_alloc: raw_collections::alloc::RefBump<'extractor>, - ) -> Result { - Ok(FullySend(RefCell::new(CboCachedSorter::new( - // TODO use a better value - 1_000_000.try_into().unwrap(), - create_sorter( - grenad::SortAlgorithm::Stable, - MergeDeladdCboRoaringBitmaps, - self.grenad_parameters.chunk_compression_type, - self.grenad_parameters.chunk_compression_level, - self.grenad_parameters.max_nb_chunks, - self.max_memory, - false, - ), - )))) + fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result { + Ok(RefCell::new(BalancedCaches::new_in( + self.buckets, + self.grenad_parameters.max_memory, + extractor_alloc, + ))) } fn process( &self, change: DocumentChange, - context: &crate::update::new::indexer::document_changes::DocumentChangeContext, + context: &DocumentChangeContext, ) -> Result<()> { EX::extract_document_change(context, self.tokenizer, change) } } pub trait SearchableExtractor: Sized + Sync { - fn run_extraction<'pl, 'fid, 'indexer, 'index, DC: DocumentChanges<'pl>>( + fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>>( grenad_parameters: GrenadParameters, document_changes: &DC, indexing_context: IndexingContext<'fid, 'indexer, 'index>, - extractor_allocs: &mut ThreadLocal>>, - ) -> Result> { - let max_memory = grenad_parameters.max_memory_by_thread(); - + extractor_allocs: &'extractor mut ThreadLocal>, + ) -> Result>> { let rtxn = indexing_context.index.read_txn()?; let stop_words = indexing_context.index.stop_words(&rtxn)?; let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?; @@ -104,7 +88,7 @@ pub trait SearchableExtractor: Sized + Sync { let extractor_data: SearchableExtractorData = SearchableExtractorData { tokenizer: &document_tokenizer, grenad_parameters, - max_memory, + buckets: rayon::current_num_threads(), _ex: PhantomData, }; @@ -122,37 +106,12 @@ pub trait SearchableExtractor: Sized + Sync { &datastore, )?; } - { - let mut builder = grenad::MergerBuilder::new(MergeDeladdCboRoaringBitmaps); - let span = - tracing::trace_span!(target: "indexing::documents::extract", "merger_building"); - let _entered = span.enter(); - let readers: Vec<_> = datastore - .into_iter() - .par_bridge() - .map(|cache_entry| { - let cached_sorter: FullySend< - RefCell>, - > = cache_entry; - let cached_sorter = cached_sorter.0.into_inner(); - let sorter = cached_sorter.into_sorter()?; - sorter.into_reader_cursors() - }) - .collect(); - - for reader in readers { - builder.extend(reader?); - } - - Ok(builder.build()) - } + Ok(datastore.into_iter().map(RefCell::into_inner).collect()) } fn extract_document_change( - context: &DocumentChangeContext< - FullySend>>, - >, + context: &DocumentChangeContext>, document_tokenizer: &DocumentTokenizer, document_change: DocumentChange, ) -> Result<()>; @@ -164,12 +123,12 @@ pub trait SearchableExtractor: Sized + Sync { } impl DocidsExtractor for T { - fn run_extraction<'pl, 'fid, 'indexer, 'index, DC: DocumentChanges<'pl>>( + fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>>( grenad_parameters: GrenadParameters, document_changes: &DC, indexing_context: IndexingContext<'fid, 'indexer, 'index>, - extractor_allocs: &mut ThreadLocal>>, - ) -> Result> { + extractor_allocs: &'extractor mut ThreadLocal>, + ) -> Result>> { Self::run_extraction( grenad_parameters, document_changes, diff --git a/milli/src/update/new/extract/searchable/tokenize_document.rs b/milli/src/update/new/extract/searchable/tokenize_document.rs index b8fd24f1b..7c4ada467 100644 --- a/milli/src/update/new/extract/searchable/tokenize_document.rs +++ b/milli/src/update/new/extract/searchable/tokenize_document.rs @@ -171,7 +171,6 @@ mod test { use bumpalo::Bump; use charabia::TokenizerBuilder; use meili_snap::snapshot; - use raw_collections::RawMap; use serde_json::json; use serde_json::value::RawValue; diff --git a/milli/src/update/new/facet_search_builder.rs b/milli/src/update/new/facet_search_builder.rs index 4602b5a30..b9db80afb 100644 --- a/milli/src/update/new/facet_search_builder.rs +++ b/milli/src/update/new/facet_search_builder.rs @@ -1,31 +1,24 @@ use std::collections::{BTreeSet, HashMap}; -use charabia::{normalizer::NormalizerOption, Language, Normalize, StrDetection, Token}; +use charabia::normalizer::NormalizerOption; +use charabia::{Language, Normalize, StrDetection, Token}; use grenad::Sorter; -use heed::{ - types::{Bytes, SerdeJson}, - BytesDecode, BytesEncode, RoTxn, -}; +use heed::types::{Bytes, SerdeJson}; +use heed::{BytesDecode, BytesEncode, RoTxn, RwTxn}; +use super::channel::FacetSearchableSender; +use super::extract::FacetKind; +use super::fst_merger_builder::FstMergerBuilder; +use super::KvReaderDelAdd; +use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec}; +use crate::heed_codec::StrRefCodec; +use crate::update::del_add::{DelAdd, KvWriterDelAdd}; +use crate::update::{create_sorter, MergeDeladdBtreesetString}; use crate::{ - heed_codec::{ - facet::{FacetGroupKey, FacetGroupKeyCodec}, - StrRefCodec, - }, - update::{ - create_sorter, - del_add::{DelAdd, KvWriterDelAdd}, - MergeDeladdBtreesetString, - }, BEU16StrCodec, FieldId, GlobalFieldsIdsMap, Index, LocalizedAttributesRule, Result, MAX_FACET_VALUE_LENGTH, }; -use super::{ - channel::FacetSearchableSender, extract::FacetKind, fst_merger_builder::FstMergerBuilder, - KvReaderDelAdd, -}; - pub struct FacetSearchBuilder<'indexer> { registered_facets: HashMap, normalized_facet_string_docids_sorter: Sorter, @@ -49,6 +42,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> { None, None, Some(0), + false, ); Self { @@ -84,7 +78,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> { } let locales = self.locales(field_id); - let hyper_normalized_value = normalize_facet_string(left_bound, locales.as_deref()); + let hyper_normalized_value = normalize_facet_string(left_bound, locales); let set = BTreeSet::from_iter(std::iter::once(left_bound)); @@ -103,7 +97,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> { } fn locales(&mut self, field_id: FieldId) -> Option<&[Language]> { - if self.localized_field_ids.get(&field_id).is_none() { + if !self.localized_field_ids.contains_key(&field_id) { let Some(field_name) = self.global_fields_ids_map.name(field_id) else { unreachable!("Field id {} not found in the global fields ids map", field_id); }; @@ -124,7 +118,8 @@ impl<'indexer> FacetSearchBuilder<'indexer> { pub fn merge_and_send( self, index: &Index, - rtxn: &RoTxn<'_>, + wtxn: &mut RwTxn, + rtxn: &RoTxn, sender: FacetSearchableSender, ) -> Result<()> { let reader = self.normalized_facet_string_docids_sorter.into_reader_cursors()?; @@ -139,13 +134,14 @@ impl<'indexer> FacetSearchBuilder<'indexer> { let mut fst_merger_builder: Option = None; while let Some((key, deladd)) = merger_iter.next()? { let (field_id, normalized_facet_string) = - BEU16StrCodec::bytes_decode(&key).map_err(heed::Error::Encoding)?; + BEU16StrCodec::bytes_decode(key).map_err(heed::Error::Encoding)?; if current_field_id != Some(field_id) { if let Some(fst_merger_builder) = fst_merger_builder { // send the previous fst to the channel let mmap = fst_merger_builder.build(&mut callback)?; - sender.write_fst(&field_id.to_be_bytes(), mmap).unwrap(); + // sender.write_fst(&field_id.to_be_bytes(), mmap).unwrap(); + todo!("What to do"); } println!("getting fst for field_id: {}", field_id); @@ -198,7 +194,8 @@ impl<'indexer> FacetSearchBuilder<'indexer> { if let (Some(field_id), Some(fst_merger_builder)) = (current_field_id, fst_merger_builder) { let mmap = fst_merger_builder.build(&mut callback)?; - sender.write_fst(&field_id.to_be_bytes(), mmap).unwrap(); + // sender.write_fst(&field_id.to_be_bytes(), mmap).unwrap(); + todo!("What to do"); } Ok(()) @@ -209,7 +206,7 @@ fn callback(_bytes: &[u8], _deladd: DelAdd, _is_modified: bool) -> Result<()> { Ok(()) } -fn merge_btreesets<'a>( +fn merge_btreesets( current: Option<&[u8]>, del: Option<&[u8]>, add: Option<&[u8]>, diff --git a/milli/src/update/new/indexer/de.rs b/milli/src/update/new/indexer/de.rs index fa6b5fa76..3da4fc239 100644 --- a/milli/src/update/new/indexer/de.rs +++ b/milli/src/update/new/indexer/de.rs @@ -49,7 +49,7 @@ impl<'de, 'p, 'indexer: 'de, Mapper: MutFieldIdMapper> Visitor<'de> visitor: MutFieldIdMapVisitor(self.fields_ids_map), })? { - let Some(fid) = fid else { + let Some(_fid) = fid else { return Ok(Err(crate::UserError::AttributeLimitReached)); }; self.fields_ids_map = fields_ids_map; diff --git a/milli/src/update/new/indexer/document_changes.rs b/milli/src/update/new/indexer/document_changes.rs index 423ddbdcc..a6bef9330 100644 --- a/milli/src/update/new/indexer/document_changes.rs +++ b/milli/src/update/new/indexer/document_changes.rs @@ -3,7 +3,6 @@ use std::sync::{Arc, RwLock}; use bumpalo::Bump; use heed::RoTxn; -use raw_collections::alloc::RefBump; use rayon::iter::IndexedParallelIterator; use super::super::document_change::DocumentChange; @@ -104,6 +103,10 @@ pub struct FullySend(pub T); // SAFETY: a type **fully** send is always mostly send as well. unsafe impl MostlySend for FullySend where T: Send {} +unsafe impl MostlySend for RefCell where T: MostlySend {} + +unsafe impl MostlySend for Option where T: MostlySend {} + impl FullySend { pub fn into(self) -> T { self.0 @@ -256,7 +259,7 @@ pub struct DocumentChangeContext< pub doc_alloc: Bump, /// Data allocated in this allocator is not cleared between each call to `process`, unless the data spills. - pub extractor_alloc: RefBump<'extractor>, + pub extractor_alloc: &'extractor Bump, /// Pool of doc allocators, used to retrieve the doc allocator we provided for the documents doc_allocs: &'doc ThreadLocal>>, @@ -279,14 +282,14 @@ impl< index: &'indexer Index, db_fields_ids_map: &'indexer FieldsIdsMap, new_fields_ids_map: &'fid RwLock, - extractor_allocs: &'extractor ThreadLocal>>, + extractor_allocs: &'extractor ThreadLocal>, doc_allocs: &'doc ThreadLocal>>, datastore: &'data ThreadLocal, fields_ids_map_store: &'doc ThreadLocal>>>, init_data: F, ) -> Result where - F: FnOnce(RefBump<'extractor>) -> Result, + F: FnOnce(&'extractor Bump) -> Result, { let doc_alloc = doc_allocs.get_or(|| FullySend(Cell::new(Bump::with_capacity(1024 * 1024 * 1024)))); @@ -297,9 +300,7 @@ impl< let fields_ids_map = &fields_ids_map.0; let extractor_alloc = extractor_allocs.get_or_default(); - let extractor_alloc = RefBump::new(extractor_alloc.0.borrow_or_yield()); - - let data = datastore.get_or_try(|| init_data(RefBump::clone(&extractor_alloc)))?; + let data = datastore.get_or_try(move || init_data(&extractor_alloc.0))?; let txn = index.read_txn()?; Ok(DocumentChangeContext { @@ -308,7 +309,7 @@ impl< db_fields_ids_map, new_fields_ids_map: fields_ids_map, doc_alloc, - extractor_alloc, + extractor_alloc: &extractor_alloc.0, data, doc_allocs, }) @@ -319,7 +320,7 @@ impl< pub trait Extractor<'extractor>: Sync { type Data: MostlySend; - fn init_data<'doc>(&'doc self, extractor_alloc: RefBump<'extractor>) -> Result; + fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> Result; fn process<'doc>( &'doc self, @@ -375,15 +376,17 @@ pub fn for_each_document_change< doc_allocs, fields_ids_map_store, }: IndexingContext<'fid, 'indexer, 'index>, - extractor_allocs: &'extractor mut ThreadLocal>>, + extractor_allocs: &'extractor mut ThreadLocal>, datastore: &'data ThreadLocal, ) -> Result<()> where EX: Extractor<'extractor>, { + eprintln!("We are resetting the extractor allocators"); // Clean up and reuse the extractor allocs for extractor_alloc in extractor_allocs.iter_mut() { - extractor_alloc.0.get_mut().reset(); + eprintln!("\tWith {} bytes resetted", extractor_alloc.0.allocated_bytes()); + extractor_alloc.0.reset(); } let pi = document_changes.iter(); diff --git a/milli/src/update/new/indexer/document_deletion.rs b/milli/src/update/new/indexer/document_deletion.rs index a9628f419..c62f5c28f 100644 --- a/milli/src/update/new/indexer/document_deletion.rs +++ b/milli/src/update/new/indexer/document_deletion.rs @@ -80,7 +80,6 @@ mod test { use std::sync::RwLock; use bumpalo::Bump; - use raw_collections::alloc::RefBump; use crate::index::tests::TempIndex; use crate::update::new::indexer::document_changes::{ @@ -95,11 +94,7 @@ mod test { fn test_deletions() { struct DeletionWithData<'extractor> { deleted: RefCell< - hashbrown::HashSet< - DocumentId, - hashbrown::hash_map::DefaultHashBuilder, - RefBump<'extractor>, - >, + hashbrown::HashSet, >, } @@ -110,10 +105,7 @@ mod test { impl<'extractor> Extractor<'extractor> for TrackDeletion<'extractor> { type Data = DeletionWithData<'extractor>; - fn init_data( - &self, - extractor_alloc: raw_collections::alloc::RefBump<'extractor>, - ) -> crate::Result { + fn init_data(&self, extractor_alloc: &'extractor Bump) -> crate::Result { let deleted = RefCell::new(hashbrown::HashSet::new_in(extractor_alloc)); Ok(DeletionWithData { deleted }) } @@ -173,8 +165,7 @@ mod test { println!("deleted by {index}: {:?}", data.deleted.borrow()); } for alloc in extractor_allocs.iter_mut() { - let alloc = &mut alloc.0; - alloc.get_mut().reset(); + alloc.0.reset(); } } } diff --git a/milli/src/update/new/indexer/mod.rs b/milli/src/update/new/indexer/mod.rs index 29ff2685e..1122d3ac9 100644 --- a/milli/src/update/new/indexer/mod.rs +++ b/milli/src/update/new/indexer/mod.rs @@ -1,26 +1,24 @@ -use std::cell::RefCell; +use std::cmp::Ordering; use std::sync::RwLock; use std::thread::{self, Builder}; use big_s::S; -use bumpalo::Bump; use document_changes::{ - for_each_document_change, DocumentChanges, Extractor, FullySend, IndexingContext, RefCellExt, - ThreadLocal, + for_each_document_change, DocumentChanges, FullySend, IndexingContext, ThreadLocal, }; pub use document_deletion::DocumentDeletion; pub use document_operation::DocumentOperation; +use heed::types::{Bytes, DecodeIgnore, Str}; use heed::{RoTxn, RwTxn}; +use itertools::{merge_join_by, EitherOrBoth}; pub use partial_dump::PartialDump; use rayon::ThreadPool; use time::OffsetDateTime; pub use update_by_function::UpdateByFunction; use super::channel::*; -use super::document::write_to_obkv; -use super::document_change::DocumentChange; use super::extract::*; -use super::merger::{merge_grenad_entries, FacetFieldIdsDelta}; +use super::merger::{FacetDatabases, FacetFieldIdsDelta}; use super::word_fst_builder::PrefixDelta; use super::words_prefix_docids::{ compute_word_prefix_docids, compute_word_prefix_fid_docids, compute_word_prefix_position_docids, @@ -28,75 +26,23 @@ use super::words_prefix_docids::{ use super::{StdResult, TopLevelMap}; use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY}; use crate::facet::FacetType; +use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY}; use crate::proximity::ProximityPrecision; -use crate::update::new::channel::ExtractorSender; +use crate::update::del_add::DelAdd; +use crate::update::new::word_fst_builder::{PrefixData, WordFstBuilder}; use crate::update::new::words_prefix_docids::compute_exact_word_prefix_docids; +use crate::update::new::{merge_and_send_docids, merge_and_send_facet_docids}; use crate::update::settings::InnerIndexSettings; use crate::update::{FacetsUpdateBulk, GrenadParameters}; -use crate::{Error, FieldsIdsMap, GlobalFieldsIdsMap, Index, Result, UserError}; +use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, Result, UserError}; -pub(crate) mod de; +pub mod de; pub mod document_changes; mod document_deletion; mod document_operation; mod partial_dump; mod update_by_function; -struct DocumentExtractor<'a> { - document_sender: &'a DocumentSender<'a>, -} - -impl<'a, 'extractor> Extractor<'extractor> for DocumentExtractor<'a> { - type Data = FullySend<()>; - - fn init_data( - &self, - _extractor_alloc: raw_collections::alloc::RefBump<'extractor>, - ) -> Result { - Ok(FullySend(())) - } - - fn process( - &self, - change: DocumentChange, - context: &document_changes::DocumentChangeContext, - ) -> Result<()> { - let mut document_buffer = Vec::new(); - - let new_fields_ids_map = context.new_fields_ids_map.borrow_or_yield(); - let new_fields_ids_map = &*new_fields_ids_map; - let new_fields_ids_map = new_fields_ids_map.local_map(); - - let external_docid = change.external_docid().to_owned(); - - // document but we need to create a function that collects and compresses documents. - match change { - DocumentChange::Deletion(deletion) => { - let docid = deletion.docid(); - self.document_sender.delete(docid, external_docid).unwrap(); - } - /// TODO: change NONE by SOME(vector) when implemented - DocumentChange::Update(update) => { - let docid = update.docid(); - let content = - update.new(&context.txn, context.index, &context.db_fields_ids_map)?; - let content = - write_to_obkv(&content, None, new_fields_ids_map, &mut document_buffer)?; - self.document_sender.insert(docid, external_docid, content.boxed()).unwrap(); - } - DocumentChange::Insertion(insertion) => { - let docid = insertion.docid(); - let content = insertion.new(); - let content = - write_to_obkv(&content, None, new_fields_ids_map, &mut document_buffer)?; - self.document_sender.insert(docid, external_docid, content.boxed()).unwrap(); - // extracted_dictionary_sender.send(self, dictionary: &[u8]); - } - } - Ok(()) - } -} - /// This is the main function of this crate. /// /// Give it the output of the [`Indexer::document_changes`] method and it will execute it in the [`rayon::ThreadPool`]. @@ -114,12 +60,11 @@ pub fn index<'pl, 'indexer, 'index, DC>( where DC: DocumentChanges<'pl>, { - let (merger_sender, writer_receiver) = merger_writer_channel(10_000); - // This channel acts as a rendezvous point to ensure that we are one task ahead - let (extractor_sender, merger_receiver) = extractors_merger_channels(4); - + // TODO find a better channel limit + let (extractor_sender, writer_receiver) = extractor_writer_channel(10_000); let new_fields_ids_map = RwLock::new(new_fields_ids_map); + let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map); let fields_ids_map_store = ThreadLocal::with_capacity(pool.current_num_threads()); let mut extractor_allocs = ThreadLocal::with_capacity(pool.current_num_threads()); let doc_allocs = ThreadLocal::with_capacity(pool.current_num_threads()); @@ -132,124 +77,171 @@ where fields_ids_map_store: &fields_ids_map_store, }; - thread::scope(|s| { + thread::scope(|s| -> crate::Result<_> { let indexer_span = tracing::Span::current(); // TODO manage the errors correctly - let handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || { + let extractor_handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || { pool.in_place_scope(|_s| { - let span = tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "extract"); + let span = tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "extract"); + let _entered = span.enter(); + + // document but we need to create a function that collects and compresses documents. + let rtxn = index.read_txn().unwrap(); + let document_sender = extractor_sender.documents(); + let document_extractor = DocumentsExtractor::new(&document_sender); + let datastore = ThreadLocal::with_capacity(pool.current_num_threads()); + for_each_document_change(document_changes, &document_extractor, indexing_context, &mut extractor_allocs, &datastore)?; + + let mut documents_ids = index.documents_ids(&rtxn)?; + let delta_documents_ids = datastore.into_iter().map(|FullySend(d)| d.into_inner()).reduce(DelAddRoaringBitmap::merge).unwrap_or_default(); + delta_documents_ids.apply_to(&mut documents_ids); + extractor_sender.send_documents_ids(documents_ids).unwrap(); + + // document_sender.finish().unwrap(); + + const TEN_GIB: usize = 10 * 1024 * 1024 * 1024; + let current_num_threads = rayon::current_num_threads(); + let max_memory = TEN_GIB / current_num_threads; + eprintln!("A maximum of {max_memory} bytes will be used for each of the {current_num_threads} threads"); + let grenad_parameters = GrenadParameters { + max_memory: Some(max_memory), + ..GrenadParameters::default() + }; + + let facet_field_ids_delta; + + { + let span = tracing::trace_span!(target: "indexing::documents::extract", "faceted"); + let _entered = span.enter(); + facet_field_ids_delta = merge_and_send_facet_docids( + global_fields_ids_map, + FacetedDocidsExtractor::run_extraction(grenad_parameters, document_changes, indexing_context, &mut extractor_allocs)?, + FacetDatabases::new(index), + index, + extractor_sender.facet_docids(), + )?; + } + + { + let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids"); let _entered = span.enter(); - // document but we need to create a function that collects and compresses documents. - let document_sender = extractor_sender.document_sender(); - let document_extractor = DocumentExtractor { document_sender: &document_sender}; - let datastore = ThreadLocal::with_capacity(pool.current_num_threads()); - for_each_document_change(document_changes, &document_extractor, indexing_context, &mut extractor_allocs, &datastore)?; - - document_sender.finish().unwrap(); - - const TEN_GIB: usize = 10 * 1024 * 1024 * 1024; - let max_memory = TEN_GIB / dbg!(rayon::current_num_threads()); - let grenad_parameters = GrenadParameters { - max_memory: Some(max_memory), - ..GrenadParameters::default() - }; + let WordDocidsCaches { + word_docids, + word_fid_docids, + exact_word_docids, + word_position_docids, + fid_word_count_docids, + } = WordDocidsExtractors::run_extraction(grenad_parameters, document_changes, indexing_context, &mut extractor_allocs)?; + // TODO Word Docids Merger + // extractor_sender.send_searchable::(word_docids).unwrap(); { - let span = tracing::trace_span!(target: "indexing::documents::extract", "faceted"); + let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids"); let _entered = span.enter(); - extract_and_send_docids::< - _, - FacetedDocidsExtractor, - FacetDocids, - >( - grenad_parameters, - document_changes, - indexing_context, - &mut extractor_allocs, - &extractor_sender, - )?; - } - - { - let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids"); - let _entered = span.enter(); - - let WordDocidsMergers { - word_fid_docids, + merge_and_send_docids( word_docids, - exact_word_docids, - word_position_docids, - fid_word_count_docids, - } = WordDocidsExtractors::run_extraction(grenad_parameters, document_changes, indexing_context, &mut extractor_allocs)?; - extractor_sender.send_searchable::(word_docids).unwrap(); - extractor_sender.send_searchable::(word_fid_docids).unwrap(); - extractor_sender.send_searchable::(exact_word_docids).unwrap(); - extractor_sender.send_searchable::(word_position_docids).unwrap(); - extractor_sender.send_searchable::(fid_word_count_docids).unwrap(); - } - - // run the proximity extraction only if the precision is by word - // this works only if the settings didn't change during this transaction. - let rtxn = index.read_txn().unwrap(); - let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default(); - if proximity_precision == ProximityPrecision::ByWord { - let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids"); - let _entered = span.enter(); - extract_and_send_docids::< - _, - WordPairProximityDocidsExtractor, - WordPairProximityDocids, - >( - grenad_parameters, - document_changes, - indexing_context, - &mut extractor_allocs, - &extractor_sender, + index.word_docids.remap_types(), + index, + extractor_sender.docids::(), )?; } + // Word Fid Docids Merging + // extractor_sender.send_searchable::(word_fid_docids).unwrap(); { - let span = tracing::trace_span!(target: "indexing::documents::extract", "FINISH"); + let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids"); let _entered = span.enter(); + merge_and_send_docids( + word_fid_docids, + index.word_fid_docids.remap_types(), + index, + extractor_sender.docids::() + )?; } - // TODO THIS IS TOO MUCH - // - [ ] Extract fieldid docid facet number - // - [ ] Extract fieldid docid facet string - // - [ ] Extract facetid string fst - // - [ ] Extract facetid normalized string strings + // Exact Word Docids Merging + // extractor_sender.send_searchable::(exact_word_docids).unwrap(); + { + let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids"); + let _entered = span.enter(); + merge_and_send_docids( + exact_word_docids, + index.exact_word_docids.remap_types(), + index, + extractor_sender.docids::(), + )?; + } - // TODO Inverted Indexes again - // - [x] Extract fieldid facet isempty docids - // - [x] Extract fieldid facet isnull docids - // - [x] Extract fieldid facet exists docids + // Word Position Docids Merging + // extractor_sender.send_searchable::(word_position_docids).unwrap(); + { + let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids"); + let _entered = span.enter(); + merge_and_send_docids( + word_position_docids, + index.word_position_docids.remap_types(), + index, + extractor_sender.docids::(), + )?; + } - // TODO This is the normal system - // - [x] Extract fieldid facet number docids - // - [x] Extract fieldid facet string docids + // Fid Word Count Docids Merging + // extractor_sender.send_searchable::(fid_word_count_docids).unwrap(); + { + let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids"); + let _entered = span.enter(); + merge_and_send_docids( + fid_word_count_docids, + index.field_id_word_count_docids.remap_types(), + index, + extractor_sender.docids::(), + )?; + } + } - Ok(()) as Result<_> - }) + // run the proximity extraction only if the precision is by word + // this works only if the settings didn't change during this transaction. + let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default(); + if proximity_precision == ProximityPrecision::ByWord { + let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids"); + let _entered = span.enter(); + let caches = ::run_extraction(grenad_parameters, document_changes, indexing_context, &mut extractor_allocs)?; + merge_and_send_docids( + caches, + index.word_pair_proximity_docids.remap_types(), + index, + extractor_sender.docids::(), + )?; + } + + { + let span = tracing::trace_span!(target: "indexing::documents::extract", "FINISH"); + let _entered = span.enter(); + } + + // TODO THIS IS TOO MUCH + // - [ ] Extract fieldid docid facet number + // - [ ] Extract fieldid docid facet string + // - [ ] Extract facetid string fst + // - [ ] Extract facetid normalized string strings + + // TODO Inverted Indexes again + // - [x] Extract fieldid facet isempty docids + // - [x] Extract fieldid facet isnull docids + // - [x] Extract fieldid facet exists docids + + // TODO This is the normal system + // - [x] Extract fieldid facet number docids + // - [x] Extract fieldid facet string docids + + // TODO use None when needed + Result::Ok(facet_field_ids_delta) + }) })?; let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map); - let indexer_span = tracing::Span::current(); - // TODO manage the errors correctly - let merger_thread = Builder::new().name(S("indexer-merger")).spawn_scoped(s, move || { - let span = - tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "merge"); - let _entered = span.enter(); - let rtxn = index.read_txn().unwrap(); - merge_grenad_entries( - merger_receiver, - merger_sender, - &rtxn, - index, - global_fields_ids_map, - ) - })?; for operation in writer_receiver { let database = operation.database(index); @@ -264,18 +256,66 @@ where } /// TODO handle the panicking threads - handle.join().unwrap()?; - let merger_result = merger_thread.join().unwrap()?; + let facet_field_ids_delta = extractor_handle.join().unwrap()?; - if let Some(facet_field_ids_delta) = merger_result.facet_field_ids_delta { - compute_facet_level_database(index, wtxn, facet_field_ids_delta)?; - } + let prefix_delta = { + let rtxn = index.read_txn()?; + let words_fst = index.words_fst(&rtxn)?; + let mut word_fst_builder = WordFstBuilder::new(&words_fst)?; + let prefix_settings = index.prefix_settings(&rtxn)?; + word_fst_builder.with_prefix_settings(prefix_settings); - if let Some(prefix_delta) = merger_result.prefix_delta { + let previous_words = index.word_docids.iter(&rtxn)?.remap_data_type::(); + let current_words = index.word_docids.iter(wtxn)?.remap_data_type::(); + for eob in merge_join_by(previous_words, current_words, |lhs, rhs| match (lhs, rhs) { + (Ok((l, _)), Ok((r, _))) => l.cmp(r), + (Err(_), _) | (_, Err(_)) => Ordering::Equal, + }) { + match eob { + EitherOrBoth::Both(lhs, rhs) => { + if let Some(e) = lhs.err().or(rhs.err()) { + return Err(e.into()); + } + } + EitherOrBoth::Left(result) => { + let (word, _) = result?; + word_fst_builder.register_word(DelAdd::Deletion, word.as_ref())?; + } + EitherOrBoth::Right(result) => { + let (word, _) = result?; + word_fst_builder.register_word(DelAdd::Addition, word.as_ref())?; + } + } + } + + let span = tracing::trace_span!(target: "indexing::documents::merge", "words_fst"); + let _entered = span.enter(); + + let (word_fst_mmap, prefix_data) = word_fst_builder.build(index, &rtxn)?; + // extractor_sender.main().write_words_fst(word_fst_mmap).unwrap(); + index.main.remap_types::().put(wtxn, WORDS_FST_KEY, &word_fst_mmap)?; + if let Some(PrefixData { prefixes_fst_mmap, prefix_delta }) = prefix_data { + // extractor_sender.main().write_words_prefixes_fst(prefixes_fst_mmap).unwrap(); + index.main.remap_types::().put( + wtxn, + WORDS_PREFIXES_FST_KEY, + &prefixes_fst_mmap, + )?; + Some(prefix_delta) + } else { + None + } + }; + + // if let Some(facet_field_ids_delta) = merger_result.facet_field_ids_delta { + // compute_facet_level_database(index, wtxn, facet_field_ids_delta)?; + // } + + if let Some(prefix_delta) = prefix_delta { compute_prefix_database(index, wtxn, prefix_delta)?; } - Ok(()) as Result<_> + Result::Ok(()) })?; // required to into_inner the new_fields_ids_map @@ -347,30 +387,6 @@ fn compute_facet_level_database( Ok(()) } -/// TODO: GrenadParameters::default() should be removed in favor a passed parameter -/// TODO: manage the errors correctly -/// TODO: we must have a single trait that also gives the extractor type -fn extract_and_send_docids< - 'pl, - 'fid, - 'indexer, - 'index, - DC: DocumentChanges<'pl>, - E: DocidsExtractor, - D: MergerOperationType, ->( - grenad_parameters: GrenadParameters, - document_changes: &DC, - indexing_context: IndexingContext<'fid, 'indexer, 'index>, - extractor_allocs: &mut ThreadLocal>>, - sender: &ExtractorSender, -) -> Result<()> { - let merger = - E::run_extraction(grenad_parameters, document_changes, indexing_context, extractor_allocs)?; - sender.send_searchable::(merger).unwrap(); - Ok(()) -} - /// Returns the primary key that has already been set for this index or the /// one we will guess by searching for the first key that contains "id" as a substring, /// and whether the primary key changed diff --git a/milli/src/update/new/indexer/partial_dump.rs b/milli/src/update/new/indexer/partial_dump.rs index 10fc95a03..991a90ab8 100644 --- a/milli/src/update/new/indexer/partial_dump.rs +++ b/milli/src/update/new/indexer/partial_dump.rs @@ -1,12 +1,10 @@ use std::ops::DerefMut; use rayon::iter::IndexedParallelIterator; -use serde::Deserializer; use serde_json::value::RawValue; -use super::de::FieldAndDocidExtractor; use super::document_changes::{DocumentChangeContext, DocumentChanges, MostlySend, RefCellExt}; -use crate::documents::{DocumentIdExtractionError, PrimaryKey}; +use crate::documents::PrimaryKey; use crate::update::concurrent_available_ids::ConcurrentAvailableIds; use crate::update::new::document::DocumentFromVersions; use crate::update::new::document_change::Versions; diff --git a/milli/src/update/new/indexer/update_by_function.rs b/milli/src/update/new/indexer/update_by_function.rs index 826f918a4..8b34fec3d 100644 --- a/milli/src/update/new/indexer/update_by_function.rs +++ b/milli/src/update/new/indexer/update_by_function.rs @@ -1,5 +1,3 @@ -use std::collections::BTreeMap; - use raw_collections::RawMap; use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator}; use rhai::{Dynamic, Engine, OptimizationLevel, Scope, AST}; @@ -12,8 +10,8 @@ use crate::documents::PrimaryKey; use crate::error::{FieldIdMapMissingEntry, InternalError}; use crate::update::new::document::DocumentFromVersions; use crate::update::new::document_change::Versions; -use crate::update::new::{Deletion, DocumentChange, KvReaderFieldId, KvWriterFieldId, Update}; -use crate::{all_obkv_to_json, Error, FieldsIdsMap, GlobalFieldsIdsMap, Object, Result, UserError}; +use crate::update::new::{Deletion, DocumentChange, KvReaderFieldId, Update}; +use crate::{all_obkv_to_json, Error, FieldsIdsMap, Object, Result, UserError}; pub struct UpdateByFunction { documents: RoaringBitmap, diff --git a/milli/src/update/new/merger.rs b/milli/src/update/new/merger.rs index 740b215e2..7b3dd85aa 100644 --- a/milli/src/update/new/merger.rs +++ b/milli/src/update/new/merger.rs @@ -1,222 +1,20 @@ -use std::fs::File; use std::io::{self}; use bincode::ErrorKind; -use grenad::Merger; use hashbrown::HashSet; use heed::types::Bytes; use heed::{Database, RoTxn}; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; use roaring::RoaringBitmap; use super::channel::*; -use super::extract::FacetKind; -use super::facet_search_builder::FacetSearchBuilder; -use super::word_fst_builder::{PrefixData, PrefixDelta}; -use super::{Deletion, DocumentChange, KvReaderDelAdd, KvReaderFieldId}; -use crate::update::del_add::DelAdd; -use crate::update::new::channel::MergerOperation; -use crate::update::new::word_fst_builder::WordFstBuilder; -use crate::update::MergeDeladdCboRoaringBitmaps; -use crate::{ - localized_attributes_rules, CboRoaringBitmapCodec, Error, FieldId, GeoPoint, - GlobalFieldsIdsMap, Index, Result, +use super::extract::{ + merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap, FacetKind, }; - -/// TODO We must return some infos/stats -#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents", name = "merge")] -pub fn merge_grenad_entries( - receiver: MergerReceiver, - sender: MergerSender, - rtxn: &RoTxn, - index: &Index, - global_fields_ids_map: GlobalFieldsIdsMap<'_>, -) -> Result { - let mut buffer: Vec = Vec::new(); - let mut documents_ids = index.documents_ids(rtxn)?; - let mut geo_extractor = GeoExtractor::new(rtxn, index)?; - let mut merger_result = MergerResult::default(); - - for merger_operation in receiver { - match merger_operation { - MergerOperation::ExactWordDocidsMerger(merger) => { - let span = - tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids"); - let _entered = span.enter(); - merge_and_send_docids( - merger, - /// TODO do a MergerOperation::database(&Index) -> Database. - index.exact_word_docids.remap_types(), - rtxn, - &mut buffer, - sender.docids::(), - |_, _key| Ok(()), - )?; - } - MergerOperation::FidWordCountDocidsMerger(merger) => { - let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids"); - let _entered = span.enter(); - merge_and_send_docids( - merger, - index.field_id_word_count_docids.remap_types(), - rtxn, - &mut buffer, - sender.docids::(), - |_, _key| Ok(()), - )?; - } - MergerOperation::WordDocidsMerger(merger) => { - let words_fst = index.words_fst(rtxn)?; - let mut word_fst_builder = WordFstBuilder::new(&words_fst)?; - let prefix_settings = index.prefix_settings(rtxn)?; - word_fst_builder.with_prefix_settings(prefix_settings); - - { - let span = - tracing::trace_span!(target: "indexing::documents::merge", "word_docids"); - let _entered = span.enter(); - - merge_and_send_docids( - merger, - index.word_docids.remap_types(), - rtxn, - &mut buffer, - sender.docids::(), - |deladd, key| word_fst_builder.register_word(deladd, key), - )?; - } - - { - let span = - tracing::trace_span!(target: "indexing::documents::merge", "words_fst"); - let _entered = span.enter(); - - let (word_fst_mmap, prefix_data) = word_fst_builder.build(index, rtxn)?; - sender.main().write_words_fst(word_fst_mmap).unwrap(); - if let Some(PrefixData { prefixes_fst_mmap, prefix_delta }) = prefix_data { - sender.main().write_words_prefixes_fst(prefixes_fst_mmap).unwrap(); - merger_result.prefix_delta = Some(prefix_delta); - } - } - } - MergerOperation::WordFidDocidsMerger(merger) => { - let span = - tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids"); - let _entered = span.enter(); - merge_and_send_docids( - merger, - index.word_fid_docids.remap_types(), - rtxn, - &mut buffer, - sender.docids::(), - |_, _key| Ok(()), - )?; - } - MergerOperation::WordPairProximityDocidsMerger(merger) => { - let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids"); - let _entered = span.enter(); - merge_and_send_docids( - merger, - index.word_pair_proximity_docids.remap_types(), - rtxn, - &mut buffer, - sender.docids::(), - |_, _key| Ok(()), - )?; - } - MergerOperation::WordPositionDocidsMerger(merger) => { - let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids"); - let _entered = span.enter(); - merge_and_send_docids( - merger, - index.word_position_docids.remap_types(), - rtxn, - &mut buffer, - sender.docids::(), - |_, _key| Ok(()), - )?; - } - MergerOperation::InsertDocument { docid, external_id, document } => { - let span = - tracing::trace_span!(target: "indexing::documents::merge", "insert_document"); - let _entered = span.enter(); - documents_ids.insert(docid); - sender.documents().uncompressed(docid, external_id.clone(), &document).unwrap(); - - if let Some(geo_extractor) = geo_extractor.as_mut() { - let current = index.documents.remap_data_type::().get(rtxn, &docid)?; - let current: Option<&KvReaderFieldId> = current.map(Into::into); - let change = match current { - Some(current) => DocumentChange::Update(todo!()), - None => DocumentChange::Insertion(todo!()), - }; - geo_extractor.manage_change(&mut global_fields_ids_map, &change)?; - } - } - MergerOperation::DeleteDocument { docid, external_id } => { - let span = - tracing::trace_span!(target: "indexing::documents::merge", "delete_document"); - let _entered = span.enter(); - if !documents_ids.remove(docid) { - unreachable!("Tried deleting a document that we do not know about"); - } - sender.documents().delete(docid, external_id.clone()).unwrap(); - - if let Some(geo_extractor) = geo_extractor.as_mut() { - let change = DocumentChange::Deletion(Deletion::create(docid, todo!())); - geo_extractor.manage_change(&mut global_fields_ids_map, &change)?; - } - } - MergerOperation::FinishedDocument => { - // send the rtree - } - MergerOperation::FacetDocidsMerger(merger) => { - let span = - tracing::trace_span!(target: "indexing::documents::merge", "facet_docids"); - let _entered = span.enter(); - let mut facet_field_ids_delta = FacetFieldIdsDelta::new(); - let localized_attributes_rules = - index.localized_attributes_rules(rtxn)?.unwrap_or_default(); - let mut facet_search_builder = FacetSearchBuilder::new( - global_fields_ids_map.clone(), - localized_attributes_rules, - ); - merge_and_send_facet_docids( - merger, - FacetDatabases::new(index), - rtxn, - &mut buffer, - sender.facet_docids(), - &mut facet_field_ids_delta, - &mut facet_search_builder, - )?; - - merger_result.facet_field_ids_delta = Some(facet_field_ids_delta); - // merge and send the facet fst and the searchable facet values - facet_search_builder.merge_and_send(index, rtxn, sender.facet_searchable())?; - } - } - } - - { - let span = tracing::trace_span!(target: "indexing::documents::merge", "documents_ids"); - let _entered = span.enter(); - - // Send the documents ids unionized with the current one - sender.send_documents_ids(documents_ids).unwrap(); - } - - // ... - - Ok(merger_result) -} - -#[derive(Default, Debug)] -pub struct MergerResult { - /// The delta of the prefixes - pub prefix_delta: Option, - /// The field ids that have been modified - pub facet_field_ids_delta: Option, -} +use super::facet_search_builder::FacetSearchBuilder; +use super::DocumentChange; +use crate::update::del_add::DelAdd; +use crate::{CboRoaringBitmapCodec, Error, FieldId, GeoPoint, GlobalFieldsIdsMap, Index, Result}; pub struct GeoExtractor { rtree: Option>, @@ -267,80 +65,92 @@ impl GeoExtractor { } #[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")] -fn merge_and_send_docids( - merger: Merger, +pub fn merge_and_send_docids<'extractor>( + mut caches: Vec>, database: Database, - rtxn: &RoTxn<'_>, - buffer: &mut Vec, - docids_sender: impl DocidsSender, - mut register_key: impl FnMut(DelAdd, &[u8]) -> Result<()>, + index: &Index, + docids_sender: impl DocidsSender + Sync, ) -> Result<()> { - let mut merger_iter = merger.into_stream_merger_iter().unwrap(); - while let Some((key, deladd)) = merger_iter.next().unwrap() { - let current = database.get(rtxn, key)?; - let deladd: &KvReaderDelAdd = deladd.into(); - let del = deladd.get(DelAdd::Deletion); - let add = deladd.get(DelAdd::Addition); - - match merge_cbo_bitmaps(current, del, add)? { - Operation::Write(bitmap) => { - let value = cbo_bitmap_serialize_into_vec(&bitmap, buffer); - docids_sender.write(key, value).unwrap(); - register_key(DelAdd::Addition, key)?; + transpose_and_freeze_caches(&mut caches)?.into_par_iter().try_for_each(|frozen| { + let rtxn = index.read_txn()?; + let mut buffer = Vec::new(); + merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| { + let current = database.get(&rtxn, key)?; + match merge_cbo_bitmaps(current, del, add)? { + Operation::Write(bitmap) => { + let value = cbo_bitmap_serialize_into_vec(&bitmap, &mut buffer); + docids_sender.write(key, value).unwrap(); + Ok(()) + } + Operation::Delete => { + docids_sender.delete(key).unwrap(); + Ok(()) + } + Operation::Ignore => Ok(()), } - Operation::Delete => { - docids_sender.delete(key).unwrap(); - register_key(DelAdd::Deletion, key)?; - } - Operation::Ignore => (), - } - } - - Ok(()) + }) + }) } #[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")] -fn merge_and_send_facet_docids( - merger: Merger, +pub fn merge_and_send_facet_docids<'indexer, 'extractor>( + global_fields_ids_map: GlobalFieldsIdsMap<'indexer>, + mut caches: Vec>, database: FacetDatabases, - rtxn: &RoTxn<'_>, - buffer: &mut Vec, - docids_sender: impl DocidsSender, - facet_field_ids_delta: &mut FacetFieldIdsDelta, - facet_search_builder: &mut FacetSearchBuilder, -) -> Result<()> { - let mut merger_iter = merger.into_stream_merger_iter().unwrap(); - while let Some((key, deladd)) = merger_iter.next().unwrap() { - let current = database.get_cbo_roaring_bytes_value(rtxn, key)?; - let deladd: &KvReaderDelAdd = deladd.into(); - let del = deladd.get(DelAdd::Deletion); - let add = deladd.get(DelAdd::Addition); + index: &Index, + docids_sender: impl DocidsSender + Sync, +) -> Result<(FacetFieldIdsDelta, FacetSearchBuilder<'indexer>)> { + transpose_and_freeze_caches(&mut caches)? + .into_par_iter() + .map(|frozen| { + let mut facet_field_ids_delta = FacetFieldIdsDelta::default(); + let rtxn = index.read_txn()?; + let localized_attributes_rules = index.localized_attributes_rules(&rtxn)?; + let mut facet_search_builder = FacetSearchBuilder::new( + global_fields_ids_map.clone(), + localized_attributes_rules.unwrap_or_default(), + ); + let mut buffer = Vec::new(); + merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| { + let current = database.get_cbo_roaring_bytes_value(&rtxn, key)?; + match merge_cbo_bitmaps(current, del, add)? { + Operation::Write(bitmap) => { + facet_field_ids_delta.register_from_key(key); + facet_search_builder.register_from_key(DelAdd::Addition, key)?; + let value = cbo_bitmap_serialize_into_vec(&bitmap, &mut buffer); + docids_sender.write(key, value).unwrap(); + Ok(()) + } + Operation::Delete => { + facet_field_ids_delta.register_from_key(key); + facet_search_builder.register_from_key(DelAdd::Deletion, key)?; + docids_sender.delete(key).unwrap(); + Ok(()) + } + Operation::Ignore => Ok(()), + } + })?; - match merge_cbo_bitmaps(current, del, add)? { - Operation::Write(bitmap) => { - facet_field_ids_delta.register_from_key(key); - facet_search_builder.register_from_key(DelAdd::Addition, key)?; - let value = cbo_bitmap_serialize_into_vec(&bitmap, buffer); - docids_sender.write(key, value).unwrap(); - } - Operation::Delete => { - facet_field_ids_delta.register_from_key(key); - facet_search_builder.register_from_key(DelAdd::Deletion, key)?; - docids_sender.delete(key).unwrap(); - } - Operation::Ignore => (), - } - } - - Ok(()) + Ok((facet_field_ids_delta, facet_search_builder)) + }) + .reduce( + || Ok((FacetFieldIdsDelta::default(), todo!())), + |lhs, rhs| { + let (lhs_ffid, lhs_fsb) = lhs?; + let (rhs_ffid, rhs_fsb) = rhs?; + let ffid_merged = lhs_ffid.merge(rhs_ffid); + let fsb_merged = todo!(); + Ok((ffid_merged, fsb_merged)) + }, + ) } -struct FacetDatabases<'a> { +pub struct FacetDatabases<'a> { index: &'a Index, } impl<'a> FacetDatabases<'a> { - fn new(index: &'a Index) -> Self { + pub fn new(index: &'a Index) -> Self { Self { index } } @@ -361,7 +171,7 @@ impl<'a> FacetDatabases<'a> { } } -#[derive(Debug)] +#[derive(Debug, Default)] pub struct FacetFieldIdsDelta { /// The field ids that have been modified modified_facet_string_ids: HashSet, @@ -369,13 +179,6 @@ pub struct FacetFieldIdsDelta { } impl FacetFieldIdsDelta { - fn new() -> Self { - Self { - modified_facet_string_ids: HashSet::new(), - modified_facet_number_ids: HashSet::new(), - } - } - fn register_facet_string_id(&mut self, field_id: FieldId) { self.modified_facet_string_ids.insert(field_id); } @@ -414,6 +217,17 @@ impl FacetFieldIdsDelta { Some(self.modified_facet_number_ids.iter().copied().collect()) } } + + pub fn merge(mut self, rhs: Self) -> Self { + let Self { modified_facet_number_ids, modified_facet_string_ids } = rhs; + modified_facet_number_ids.into_iter().for_each(|fid| { + self.modified_facet_number_ids.insert(fid); + }); + modified_facet_string_ids.into_iter().for_each(|fid| { + self.modified_facet_string_ids.insert(fid); + }); + self + } } enum Operation { @@ -425,13 +239,10 @@ enum Operation { /// A function that merges the DelAdd CboRoaringBitmaps with the current bitmap. fn merge_cbo_bitmaps( current: Option<&[u8]>, - del: Option<&[u8]>, - add: Option<&[u8]>, + del: Option, + add: Option, ) -> Result { let current = current.map(CboRoaringBitmapCodec::deserialize_from).transpose()?; - let del = del.map(CboRoaringBitmapCodec::deserialize_from).transpose()?; - let add = add.map(CboRoaringBitmapCodec::deserialize_from).transpose()?; - match (current, del, add) { (None, None, None) => Ok(Operation::Ignore), // but it's strange (None, None, Some(add)) => Ok(Operation::Write(add)), diff --git a/milli/src/update/new/mod.rs b/milli/src/update/new/mod.rs index 16a6dd092..ee41bc0fd 100644 --- a/milli/src/update/new/mod.rs +++ b/milli/src/update/new/mod.rs @@ -1,4 +1,7 @@ pub use document_change::{Deletion, DocumentChange, Insertion, Update}; +pub use merger::{ + merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases, FacetFieldIdsDelta, +}; pub use top_level_map::{CowStr, TopLevelMap}; use super::del_add::DelAdd; diff --git a/milli/src/update/new/parallel_iterator_ext.rs b/milli/src/update/new/parallel_iterator_ext.rs index 043457cfd..ff69d7acf 100644 --- a/milli/src/update/new/parallel_iterator_ext.rs +++ b/milli/src/update/new/parallel_iterator_ext.rs @@ -1,38 +1,8 @@ use std::sync::Arc; -use rayon::iter::{MapInit, ParallelIterator}; +use rayon::iter::ParallelIterator; pub trait ParallelIteratorExt: ParallelIterator { - /// Maps items based on the init function. - /// - /// The init function is ran only as necessary which is basically once by thread. - fn try_map_try_init( - self, - init: INIT, - map_op: F, - ) -> MapInit< - Self, - impl Fn() -> Result> + Sync + Send + Clone, - impl Fn(&mut Result>, Self::Item) -> Result> + Sync + Send + Clone, - > - where - E: Send + Sync, - F: Fn(&mut T, Self::Item) -> Result + Sync + Send + Clone, - INIT: Fn() -> Result + Sync + Send + Clone, - R: Send, - { - self.map_init( - move || match init() { - Ok(t) => Ok(t), - Err(err) => Err(Arc::new(err)), - }, - move |result, item| match result { - Ok(t) => map_op(t, item).map_err(Arc::new), - Err(err) => Err(err.clone()), - }, - ) - } - /// A method to run a closure of all the items and return an owned error. /// /// The init function is ran only as necessary which is basically once by thread. @@ -58,17 +28,6 @@ pub trait ParallelIteratorExt: ParallelIterator { Err(err) => Err(Arc::into_inner(err).expect("the error must be only owned by us")), } } - - fn try_arc_for_each(self, op: F) -> Result<(), E> - where - E: Send + Sync, - F: Fn(Self::Item) -> Result<(), Arc> + Sync + Send + Clone, - { - match self.try_for_each(op) { - Ok(()) => Ok(()), - Err(err) => Err(Arc::into_inner(err).expect("the error must be only owned by us")), - } - } } impl ParallelIteratorExt for T {} diff --git a/milli/src/update/new/words_prefix_docids.rs b/milli/src/update/new/words_prefix_docids.rs index 38c2b1744..edc09c5f3 100644 --- a/milli/src/update/new/words_prefix_docids.rs +++ b/milli/src/update/new/words_prefix_docids.rs @@ -1,10 +1,16 @@ +use std::cell::RefCell; use std::collections::HashSet; +use std::io::{BufReader, BufWriter, Read, Seek, Write}; use hashbrown::HashMap; use heed::types::Bytes; -use heed::{BytesDecode, Database, RwTxn}; -use roaring::RoaringBitmap; +use heed::{BytesDecode, Database, RoTxn, RwTxn}; +use rayon::iter::{IntoParallelIterator, ParallelIterator as _}; +use roaring::MultiOps; +use tempfile::tempfile; +use thread_local::ThreadLocal; +use super::indexer::document_changes::RefCellExt; use crate::heed_codec::StrBEU16Codec; use crate::{CboRoaringBitmapCodec, Index, Prefix, Result}; @@ -38,22 +44,103 @@ impl WordPrefixDocids { prefixes: &HashSet, ) -> Result<()> { // We fetch the docids associated to the newly added word prefix fst only. - let mut docids = RoaringBitmap::new(); - for prefix in prefixes { - docids.clear(); - let prefix = prefix.as_bytes(); - for result in self.database.prefix_iter(wtxn, prefix)? { - let (_word, data) = result?; - docids |= &data; - } + // And collect the CboRoaringBitmaps pointers in an HashMap. + let frozen = FrozenPrefixBitmaps::from_prefixes(self.database, wtxn, prefixes)?; - self.prefix_database.put(wtxn, prefix, &docids)?; + // We access this HashMap in parallel to compute the *union* of all + // of them and *serialize* them into files. There is one file by CPU. + let local_entries = ThreadLocal::with_capacity(rayon::current_num_threads()); + prefixes.into_par_iter().map(AsRef::as_ref).try_for_each(|prefix| { + let refcell = local_entries.get_or_try(|| { + tempfile().map(BufWriter::new).map(|f| RefCell::new((Vec::new(), f, Vec::new()))) + })?; + + let mut refmut = refcell.borrow_mut_or_yield(); + let (ref mut index, ref mut file, ref mut buffer) = *refmut; + + let output = frozen + .bitmaps(prefix) + .unwrap() + .iter() + .map(|bytes| CboRoaringBitmapCodec::deserialize_from(bytes)) + .union()?; + + buffer.clear(); + CboRoaringBitmapCodec::serialize_into(&output, buffer); + index.push(PrefixEntry { prefix, serialized_length: buffer.len() }); + file.write_all(buffer) + })?; + + drop(frozen); + + // We iterate over all the collected and serialized bitmaps through + // the files and entries to eventually put them in the final database. + for refcell in local_entries { + let (index, file, mut buffer) = refcell.into_inner(); + let mut file = file.into_inner().map_err(|e| e.into_error())?; + file.rewind()?; + let mut file = BufReader::new(file); + for PrefixEntry { prefix, serialized_length } in index { + buffer.resize(serialized_length, 0); + file.read_exact(&mut buffer)?; + self.prefix_database.remap_data_type::().put( + wtxn, + prefix.as_bytes(), + &buffer, + )?; + } } Ok(()) } } +/// Represents a prefix and the lenght the bitmap takes on disk. +struct PrefixEntry<'a> { + prefix: &'a str, + serialized_length: usize, +} + +/// Stores prefixes along with all the pointers to the associated +/// CBoRoaringBitmaps. +/// +/// They are collected synchronously and stored into an HashMap. The +/// Synchronous process is doing a small amount of work by just storing +/// pointers. It can then be accessed in parallel to get the associated +/// bitmaps pointers. +struct FrozenPrefixBitmaps<'a, 'rtxn> { + prefixes_bitmaps: HashMap<&'a str, Vec<&'rtxn [u8]>>, +} + +impl<'a, 'rtxn> FrozenPrefixBitmaps<'a, 'rtxn> { + #[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")] + pub fn from_prefixes( + database: Database, + rtxn: &'rtxn RoTxn, + prefixes: &'a HashSet, + ) -> heed::Result { + let database = database.remap_data_type::(); + + let mut prefixes_bitmaps = HashMap::new(); + for prefix in prefixes { + let mut bitmap_bytes = Vec::new(); + for result in database.prefix_iter(rtxn, prefix.as_bytes())? { + let (_word, bytes) = result?; + bitmap_bytes.push(bytes); + } + assert!(prefixes_bitmaps.insert(prefix.as_str(), bitmap_bytes).is_none()); + } + + Ok(Self { prefixes_bitmaps }) + } + + pub fn bitmaps(&self, key: &str) -> Option<&[&'rtxn [u8]]> { + self.prefixes_bitmaps.get(key).map(AsRef::as_ref) + } +} + +unsafe impl<'a, 'rtxn> Sync for FrozenPrefixBitmaps<'a, 'rtxn> {} + struct WordPrefixIntegerDocids { database: Database, prefix_database: Database,