From 288a879411542fa4f74d56dae587f1abd9898218 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 14 Mar 2022 17:00:00 +0100 Subject: [PATCH 1/4] Remove three useless dependencies --- http-ui/Cargo.toml | 1 - http-ui/src/main.rs | 2 +- milli/Cargo.toml | 3 --- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/http-ui/Cargo.toml b/http-ui/Cargo.toml index 34b302f03..d8c1775f5 100644 --- a/http-ui/Cargo.toml +++ b/http-ui/Cargo.toml @@ -10,7 +10,6 @@ anyhow = "1.0.38" byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } crossbeam-channel = "0.5.0" heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1" } -meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.7" } memmap2 = "0.5.0" milli = { path = "../milli" } once_cell = "1.5.2" diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index ebfe4b073..b608e79ec 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -18,8 +18,8 @@ use either::Either; use flate2::read::GzDecoder; use futures::{stream, FutureExt, StreamExt}; use heed::EnvOpenOptions; -use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; use milli::documents::DocumentBatchReader; +use milli::tokenizer::{Analyzer, AnalyzerConfig}; use milli::update::UpdateIndexingStep::*; use milli::update::{ ClearDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting, diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 43123d53b..107674db1 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -12,15 +12,12 @@ byteorder = "1.4.2" concat-arrays = "0.1.2" crossbeam-channel = "0.5.1" either = "1.6.1" -flate2 = "1.0.20" fst = "0.4.5" fxhash = "0.2.1" grenad = { version = "0.4.1", default-features = false, features = ["tempfile"] } geoutils = "0.4.1" heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] } -human_format = "1.0.3" levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] } -linked-hash-map = "0.5.4" meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.7" } memmap2 = "0.5.0" obkv = "0.2.0" From 63682c2c9a9c0b01a54db7e84b320cd2818a6178 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 14 Mar 2022 17:00:53 +0100 Subject: [PATCH 2/4] Upgrade the dependencies --- benchmarks/Cargo.toml | 16 +++++++-------- cli/Cargo.toml | 14 ++++++------- filter-parser/Cargo.toml | 2 +- helpers/Cargo.toml | 6 +++--- http-ui/Cargo.toml | 42 +++++++++++++++++++------------------- infos/Cargo.toml | 12 +++++------ milli/Cargo.toml | 44 ++++++++++++++++++++-------------------- 7 files changed, 68 insertions(+), 68 deletions(-) diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index b48599679..0cac5e017 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -6,8 +6,8 @@ publish = false [dependencies] milli = { path = "../milli" } -anyhow = "1.0" -serde_json = { version = "1.0.62", features = ["preserve_order"] } +anyhow = "1.0.56" +serde_json = { version = "1.0.79", features = ["preserve_order"] } csv = "1.1.6" [target.'cfg(target_os = "linux")'.dependencies] @@ -15,14 +15,14 @@ jemallocator = "0.3.2" [dev-dependencies] heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1" } -criterion = { version = "0.3.4", features = ["html_reports"] } +criterion = { version = "0.3.5", features = ["html_reports"] } [build-dependencies] -anyhow = "1.0" -bytes = "1.0" -flate2 = "1.0.20" -convert_case = "0.4" -reqwest = { version = "0.11.3", features = ["blocking", "rustls-tls"], default-features = false } +anyhow = "1.0.56" +bytes = "1.1.0" +flate2 = "1.0.22" +convert_case = "0.5.0" +reqwest = { version = "0.11.9", features = ["blocking", "rustls-tls"], default-features = false } [[bench]] name = "search_songs" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 023cd06f6..4378902ca 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -8,15 +8,15 @@ description = "A CLI to interact with a milli index" [dependencies] indicatif = "0.16.2" -serde = "1.0.129" -serde_json = "1.0.66" -structopt = "0.3.22" +serde = "1.0.136" +serde_json = "1.0.79" +structopt = "0.3.26" milli = { path = "../milli" } -eyre = "0.6.5" -color-eyre = "0.5.11" +eyre = "0.6.7" +color-eyre = "0.6.1" heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] } -byte-unit = { version = "4.0.12", features = ["serde"] } -bimap = "0.6.1" +byte-unit = { version = "4.0.14", features = ["serde"] } +bimap = "0.6.2" csv = "1.1.6" stderrlog = "0.5.1" diff --git a/filter-parser/Cargo.toml b/filter-parser/Cargo.toml index ee44bcb7f..ea29404ed 100644 --- a/filter-parser/Cargo.toml +++ b/filter-parser/Cargo.toml @@ -6,5 +6,5 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -nom = "7.0.0" +nom = "7.1.0" nom_locate = "4.0.0" diff --git a/helpers/Cargo.toml b/helpers/Cargo.toml index dc0f7dc81..482750636 100644 --- a/helpers/Cargo.toml +++ b/helpers/Cargo.toml @@ -5,12 +5,12 @@ authors = ["Clément Renault "] edition = "2018" [dependencies] -anyhow = "1.0.38" -byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } +anyhow = "1.0.56" +byte-unit = { version = "4.0.14", default-features = false, features = ["std"] } heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1" } milli = { path = "../milli" } stderrlog = "0.5.1" -structopt = { version = "0.3.21", default-features = false } +structopt = { version = "0.3.26", default-features = false } [target.'cfg(target_os = "linux")'.dependencies] jemallocator = "0.3.2" diff --git a/http-ui/Cargo.toml b/http-ui/Cargo.toml index d8c1775f5..9dd269970 100644 --- a/http-ui/Cargo.toml +++ b/http-ui/Cargo.toml @@ -6,42 +6,42 @@ authors = ["Clément Renault "] edition = "2018" [dependencies] -anyhow = "1.0.38" -byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } -crossbeam-channel = "0.5.0" +anyhow = "1.0.56" +byte-unit = { version = "4.0.14", default-features = false, features = ["std"] } +crossbeam-channel = "0.5.2" heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1" } -memmap2 = "0.5.0" +memmap2 = "0.5.3" milli = { path = "../milli" } -once_cell = "1.5.2" -rayon = "1.5.0" -structopt = { version = "0.3.21", default-features = false, features = ["wrap_help"] } -tempfile = "3.2.0" +once_cell = "1.10.0" +rayon = "1.5.1" +structopt = { version = "0.3.26", default-features = false, features = ["wrap_help"] } +tempfile = "3.3.0" # http server -askama = "0.10.5" -askama_warp = "0.10.0" -bytes = "0.5.6" +askama = "0.11.1" +askama_warp = "0.12.0" +bytes = "1.1.0" either = "1.6.1" -flate2 = "1.0.20" -futures = "0.3.12" -serde = { version = "1.0.123", features = ["derive"] } -serde_json = { version = "1.0.62", features = ["preserve_order"] } -tokio = { version = "0.2.25", features = ["full"] } -warp = "0.2.5" +flate2 = "1.0.22" +futures = "0.3.21" +serde = { version = "1.0.136", features = ["derive"] } +serde_json = { version = "1.0.79", features = ["preserve_order"] } +tokio = { version = "1.17.0", features = ["full"] } +warp = "0.3.2" # logging log = "0.4.14" stderrlog = "0.5.1" -fst = "0.4.5" +fst = "0.4.7" # Temporary fix for bitvec, remove once fixed. (https://github.com/bitvecto-rs/bitvec/issues/105) -funty = "=1.1" -bimap = "0.6.1" +funty = "2.0.0" +bimap = "0.6.2" csv = "1.1.6" [dev-dependencies] maplit = "1.0.2" -serde_test = "1.0.125" +serde_test = "1.0.136" [target.'cfg(target_os = "linux")'.dependencies] jemallocator = "0.3.2" diff --git a/infos/Cargo.toml b/infos/Cargo.toml index 41c9241ba..2863695f0 100644 --- a/infos/Cargo.toml +++ b/infos/Cargo.toml @@ -5,15 +5,15 @@ authors = ["Clément Renault "] edition = "2018" [dependencies] -anyhow = "1.0.38" -byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } -csv = "1.1.5" +anyhow = "1.0.56" +byte-unit = { version = "4.0.14", default-features = false, features = ["std"] } +csv = "1.1.6" heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1" } milli = { path = "../milli" } -roaring = "0.6.6" -serde_json = "1.0.62" +roaring = "0.9.0" +serde_json = "1.0.79" stderrlog = "0.5.1" -structopt = { version = "0.3.21", default-features = false } +structopt = { version = "0.3.26", default-features = false } [target.'cfg(target_os = "linux")'.dependencies] jemallocator = "0.3.2" diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 107674db1..ef89e7819 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -5,50 +5,50 @@ authors = ["Kerollmops "] edition = "2018" [dependencies] -bimap = { version = "0.6.1", features = ["serde"] } +bimap = { version = "0.6.2", features = ["serde"] } bincode = "1.3.3" -bstr = "0.2.15" -byteorder = "1.4.2" +bstr = "0.2.17" +byteorder = "1.4.3" concat-arrays = "0.1.2" -crossbeam-channel = "0.5.1" +crossbeam-channel = "0.5.2" either = "1.6.1" -fst = "0.4.5" +fst = "0.4.7" fxhash = "0.2.1" grenad = { version = "0.4.1", default-features = false, features = ["tempfile"] } geoutils = "0.4.1" heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] } -levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] } -meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.7" } -memmap2 = "0.5.0" +levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } +meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.8" } +memmap2 = "0.5.3" obkv = "0.2.0" -once_cell = "1.5.2" -ordered-float = "2.1.1" -rayon = "1.5.0" -roaring = "0.6.6" -rstar = { version = "0.9.1", features = ["serde"] } -serde = { version = "1.0.123", features = ["derive"] } -serde_json = { version = "1.0.62", features = ["preserve_order"] } -slice-group-by = "0.2.6" -smallstr = { version = "0.2.0", features = ["serde"] } -smallvec = "1.6.1" -tempfile = "3.2.0" +once_cell = "1.10.0" +ordered-float = "2.10.0" +rayon = "1.5.1" +roaring = "0.9.0" +rstar = { version = "0.9.2", features = ["serde"] } +serde = { version = "1.0.136", features = ["derive"] } +serde_json = { version = "1.0.79", features = ["preserve_order"] } +slice-group-by = "0.3.0" +smallstr = { version = "0.3.0", features = ["serde"] } +smallvec = "1.8.0" +tempfile = "3.3.0" time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } uuid = { version = "0.8.2", features = ["v4"] } filter-parser = { path = "../filter-parser" } # documents words self-join -itertools = "0.10.0" +itertools = "0.10.3" # logging log = "0.4.14" -logging_timer = "1.0.0" +logging_timer = "1.1.0" csv = "1.1.6" [dev-dependencies] big_s = "1.0.2" maplit = "1.0.2" -rand = "0.8.3" +rand = "0.8.5" [features] default = [] From 21ec334dcc056847a5f2bbcb799946bc5c298706 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 14 Mar 2022 17:13:07 +0100 Subject: [PATCH 3/4] Fix the compilation error of the dependency versions --- http-ui/Cargo.toml | 1 + http-ui/src/main.rs | 9 ++++--- .../cbo_roaring_bitmap_codec.rs | 25 ++++++++++--------- milli/src/search/criteria/mod.rs | 6 +++-- milli/src/search/query_tree.rs | 3 +-- milli/src/update/delete_documents.rs | 6 ++--- 6 files changed, 27 insertions(+), 23 deletions(-) diff --git a/http-ui/Cargo.toml b/http-ui/Cargo.toml index 9dd269970..e7ed8455a 100644 --- a/http-ui/Cargo.toml +++ b/http-ui/Cargo.toml @@ -27,6 +27,7 @@ futures = "0.3.21" serde = { version = "1.0.136", features = ["derive"] } serde_json = { version = "1.0.79", features = ["preserve_order"] } tokio = { version = "1.17.0", features = ["full"] } +tokio-stream = { version = "0.1.8", default-features = false, features = ["sync"] } warp = "0.3.2" # logging diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index b608e79ec..26c1034eb 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -3,7 +3,7 @@ mod update_store; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::fmt::Display; use std::fs::{create_dir_all, File}; -use std::io::{BufRead, BufReader, Cursor}; +use std::io::{BufRead, BufReader, Cursor, Read}; use std::net::SocketAddr; use std::num::{NonZeroU32, NonZeroUsize}; use std::path::PathBuf; @@ -35,6 +35,7 @@ use structopt::StructOpt; use tokio::fs::File as TFile; use tokio::io::AsyncWriteExt; use tokio::sync::broadcast; +use tokio_stream::wrappers::BroadcastStream; use warp::filters::ws::Message; use warp::http::Response; use warp::Filter; @@ -885,7 +886,8 @@ async fn main() -> anyhow::Result<()> { let mut file = TFile::from_std(file); while let Some(result) = stream.next().await { - let bytes = result.unwrap().to_bytes(); + let mut bytes = Vec::new(); + result.unwrap().reader().read_to_end(&mut bytes).unwrap(); file.write_all(&bytes[..]).await.unwrap(); } @@ -1004,8 +1006,7 @@ async fn main() -> anyhow::Result<()> { let update_status_receiver = update_status_sender.subscribe(); ws.on_upgrade(|websocket| { // Just echo all updates messages... - update_status_receiver - .into_stream() + BroadcastStream::new(update_status_receiver) .flat_map(|result| match result { Ok(status) => { let msg = serde_json::to_string(&status).unwrap(); diff --git a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs index 519997274..96aee6855 100644 --- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs @@ -82,7 +82,8 @@ impl CboRoaringBitmapCodec { buffer.extend_from_slice(&integer.to_ne_bytes()); } } else { - let roaring = RoaringBitmap::from_sorted_iter(vec.into_iter()); + // Integers *must* be ordered here, no matter what. + let roaring = RoaringBitmap::from_sorted_iter(vec.into_iter()).unwrap(); roaring.serialize_into(buffer)?; } } else { @@ -152,25 +153,25 @@ mod tests { let mut buffer = Vec::new(); let small_data = vec![ - RoaringBitmap::from_sorted_iter(1..4), - RoaringBitmap::from_sorted_iter(2..5), - RoaringBitmap::from_sorted_iter(4..6), - RoaringBitmap::from_sorted_iter(1..3), + RoaringBitmap::from_sorted_iter(1..4).unwrap(), + RoaringBitmap::from_sorted_iter(2..5).unwrap(), + RoaringBitmap::from_sorted_iter(4..6).unwrap(), + RoaringBitmap::from_sorted_iter(1..3).unwrap(), ]; let small_data: Vec<_> = small_data.iter().map(|b| CboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect(); CboRoaringBitmapCodec::merge_into(small_data.as_slice(), &mut buffer).unwrap(); let bitmap = CboRoaringBitmapCodec::deserialize_from(&buffer).unwrap(); - let expected = RoaringBitmap::from_sorted_iter(1..6); + let expected = RoaringBitmap::from_sorted_iter(1..6).unwrap(); assert_eq!(bitmap, expected); let medium_data = vec![ - RoaringBitmap::from_sorted_iter(1..4), - RoaringBitmap::from_sorted_iter(2..5), - RoaringBitmap::from_sorted_iter(4..8), - RoaringBitmap::from_sorted_iter(0..3), - RoaringBitmap::from_sorted_iter(7..23), + RoaringBitmap::from_sorted_iter(1..4).unwrap(), + RoaringBitmap::from_sorted_iter(2..5).unwrap(), + RoaringBitmap::from_sorted_iter(4..8).unwrap(), + RoaringBitmap::from_sorted_iter(0..3).unwrap(), + RoaringBitmap::from_sorted_iter(7..23).unwrap(), ]; let medium_data: Vec<_> = @@ -179,7 +180,7 @@ mod tests { CboRoaringBitmapCodec::merge_into(medium_data.as_slice(), &mut buffer).unwrap(); let bitmap = CboRoaringBitmapCodec::deserialize_from(&buffer).unwrap(); - let expected = RoaringBitmap::from_sorted_iter(0..23); + let expected = RoaringBitmap::from_sorted_iter(0..23).unwrap(); assert_eq!(bitmap, expected); } } diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index 8306f5d0e..1dbfd2524 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -498,6 +498,7 @@ fn query_pair_proximity_docids( #[cfg(test)] pub mod test { use std::collections::HashMap; + use std::iter; use maplit::hashmap; use rand::rngs::StdRng; @@ -567,7 +568,8 @@ pub mod test { .iter() .enumerate() .map(|(i, w)| { - (w.clone(), RoaringBitmap::from_sorted_iter(std::iter::once(i as u32))) + let bitmap = RoaringBitmap::from_sorted_iter(iter::once(i as u32)).unwrap(); + (w.clone(), bitmap) }) .collect()) } else { @@ -622,7 +624,7 @@ pub mod test { } values.sort_unstable(); - RoaringBitmap::from_sorted_iter(values.into_iter()) + RoaringBitmap::from_sorted_iter(values.into_iter()).unwrap() } let word_docids = hashmap! { diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index 0744231ae..237bb9be2 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -587,8 +587,7 @@ mod test { values.push(rng.gen()); } values.sort_unstable(); - - RoaringBitmap::from_sorted_iter(values.into_iter()) + RoaringBitmap::from_sorted_iter(values.into_iter()).unwrap() } TestContext { diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 2391bd0e4..402cc61dd 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -186,7 +186,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { // We create the FST map of the external ids that we must delete. external_ids.sort_unstable(); - let external_ids_to_delete = fst::Set::from_iter(external_ids.iter().map(AsRef::as_ref))?; + let external_ids_to_delete = fst::Set::from_iter(external_ids)?; // We acquire the current external documents ids map... let mut new_external_documents_ids = self.index.external_documents_ids(self.wtxn)?; @@ -209,7 +209,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { // the LMDB B-Tree two times but only once. let mut iter = word_docids.prefix_iter_mut(self.wtxn, &word)?; if let Some((key, mut docids)) = iter.next().transpose()? { - if key == word.as_ref() { + if key == word.as_str() { let previous_len = docids.len(); docids -= &self.documents_ids; if docids.is_empty() { @@ -230,7 +230,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { words.iter().filter_map( |(word, must_remove)| { if *must_remove { - Some(word.as_ref()) + Some(word.as_str()) } else { None } From 0c5f4ed7de9a05d456af6245f8990eebcf7e236d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 15 Mar 2022 14:18:29 +0100 Subject: [PATCH 4/4] Apply suggestions Co-authored-by: Many --- milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs index 96aee6855..1bd132974 100644 --- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs @@ -82,7 +82,7 @@ impl CboRoaringBitmapCodec { buffer.extend_from_slice(&integer.to_ne_bytes()); } } else { - // Integers *must* be ordered here, no matter what. + // We can unwrap safely because the vector is sorted upper. let roaring = RoaringBitmap::from_sorted_iter(vec.into_iter()).unwrap(); roaring.serialize_into(buffer)?; }