From d8ba405bafb7a6b28554e7ef006079f2f82364f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 29 Nov 2019 12:12:55 +0100 Subject: [PATCH 1/3] Add some criterion benchmarks to help mesure improvements --- .gitignore | 1 + Cargo.lock | 83 +++++++++++++++++ meilisearch-core/Cargo.toml | 5 ++ meilisearch-core/benches/search_benchmark.rs | 95 ++++++++++++++++++++ 4 files changed, 184 insertions(+) create mode 100644 meilisearch-core/benches/search_benchmark.rs diff --git a/.gitignore b/.gitignore index 3ae73d6d8..e1f56a99c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ /target +meilisearch-core/target **/*.csv **/*.json_lines **/*.rs.bk diff --git a/Cargo.lock b/Cargo.lock index 957fb5449..2dedeb04a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -196,6 +196,14 @@ dependencies = [ "ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "cast" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "cc" version = "1.0.47" @@ -284,6 +292,39 @@ dependencies = [ "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "criterion" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)", + "cast 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", + "criterion-plot 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "itertools 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_os 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_xoshiro 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rayon 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)", + "tinytemplate 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 2.2.9 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "criterion-plot" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cast 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "itertools 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "crossbeam-channel" version = "0.4.0" @@ -761,6 +802,14 @@ dependencies = [ "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "itertools" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "itoa" version = "0.4.4" @@ -888,6 +937,7 @@ dependencies = [ "bincode 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)", + "criterion 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-channel 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "deunicode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1440,6 +1490,15 @@ dependencies = [ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "rand_os" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "getrandom 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "rand_pcg" version = "0.1.2" @@ -1457,6 +1516,14 @@ dependencies = [ "rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "rand_xoshiro" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "rayon" version = "1.2.0" @@ -2045,6 +2112,15 @@ dependencies = [ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "tinytemplate" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "tokio" version = "0.1.22" @@ -2565,6 +2641,7 @@ dependencies = [ "checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" "checksum bytes 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "206fdffcfa2df7cbe15601ef46c813fce0965eb3286db6b56c583b814b51c81c" "checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb" +"checksum cast 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "4b9434b9a5aa1450faa3f9cb14ea0e8c53bb5d2b3c1bfd1ab4fc03e9f33fbfb0" "checksum cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)" = "aa87058dce70a3ff5621797f1506cb837edd02ac4c0ae642b4542dce802908b8" "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" "checksum chrono 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e8493056968583b0193c1bb04d6f7684586f3726992d6c573261941a895dbd68" @@ -2575,6 +2652,8 @@ dependencies = [ "checksum const-random-macro 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c750ec12b83377637110d5a57f5ae08e895b06c4b16e2bdbf1a94ef717428c59" "checksum cookie 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "888604f00b3db336d2af898ec3c1d5d0ddf5e6d462220f2ededc33a87ac4bbd5" "checksum crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1" +"checksum criterion 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "938703e165481c8d612ea3479ac8342e5615185db37765162e762ec3523e2fc6" +"checksum criterion-plot 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "eccdc6ce8bbe352ca89025bee672aa6d24f4eb8c53e3a8b5d1bc58011da072a2" "checksum crossbeam-channel 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "acec9a3b0b3559f15aee4f90746c4e5e293b701c0f7d3925d24e01645267b68c" "checksum crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c3aa945d63861bfe624b55d153a39684da1e8c0bc8fba932f7ee3a3c16cea3ca" "checksum crossbeam-epoch 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5064ebdbf05ce3cb95e45c8b086f72263f4166b29b97f6baff7ef7fe047b55ac" @@ -2628,6 +2707,7 @@ dependencies = [ "checksum idna 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "02e2673c30ee86b5b96a9cb52ad15718aa1f966f5ab9ad54a8b95d5ca33120a9" "checksum indexmap 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712d7b3ea5827fcb9d4fda14bf4da5f136f0db2ae9c8f4bd4e2d1c6fde4e6db2" "checksum iovec 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e" +"checksum itertools 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f56a2d0bc861f9165be4eb3442afd3c236d8a98afd426f65d92324ae1091a484" "checksum itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f" "checksum jemalloc-sys 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0d3b9f3f5c9b31aa0f5ed3260385ac205db665baa41d49bb8338008ae94ede45" "checksum jemallocator 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "43ae63fcfc45e99ab3d1b29a46782ad679e98436c3169d15a167a1108a724b69" @@ -2695,8 +2775,10 @@ dependencies = [ "checksum rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08" "checksum rand_jitter 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1166d5c91dc97b88d1decc3285bb0a99ed84b05cfd0bc2341bdf2d43fc41e39b" "checksum rand_os 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071" +"checksum rand_os 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a788ae3edb696cfcba1c19bfd388cc4b8c21f8a408432b199c072825084da58a" "checksum rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44" "checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c" +"checksum rand_xoshiro 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0e18c91676f670f6f0312764c759405f13afb98d5d73819840cf72a518487bff" "checksum rayon 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "83a27732a533a1be0a0035a111fe76db89ad312f6f0347004c220c57f209a123" "checksum rayon-core 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "98dcf634205083b17d0861252431eb2acbfb698ab7478a2d20de07954f47ec7b" "checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" @@ -2761,6 +2843,7 @@ dependencies = [ "checksum tide-querystring 0.1.0 (git+https://github.com/rustasync/tide?rev=e77709370bb24cf776fe6da902467c35131535b1)" = "" "checksum tide-slog 0.1.0 (git+https://github.com/rustasync/tide?rev=e77709370bb24cf776fe6da902467c35131535b1)" = "" "checksum time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" +"checksum tinytemplate 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4574b75faccaacddb9b284faecdf0b544b80b6b294f3d062d325c5726a209c20" "checksum tokio 0.1.22 (registry+https://github.com/rust-lang/crates.io-index)" = "5a09c0b5bb588872ab2f09afa13ee6e9dac11e10a0ec9e8e3ba39a5a5d530af6" "checksum tokio-buf 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8fb220f46c53859a4b7ec083e41dec9778ff0b1851c0942b211edb89e0ccdc46" "checksum tokio-current-thread 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "d16217cad7f1b840c5a97dfb3c43b0c871fef423a6e8d2118c604e843662a443" diff --git a/meilisearch-core/Cargo.toml b/meilisearch-core/Cargo.toml index 0da585623..a268c6605 100644 --- a/meilisearch-core/Cargo.toml +++ b/meilisearch-core/Cargo.toml @@ -32,6 +32,7 @@ zerocopy = "0.2.8" [dev-dependencies] assert_matches = "1.3" +criterion = "0.3" csv = "1.0.7" indexmap = { version = "1.2.0", features = ["serde-1"] } rustyline = { version = "5.0.0", default-features = false } @@ -39,3 +40,7 @@ structopt = "0.3.2" tempfile = "3.1.0" termcolor = "1.0.4" toml = "0.5.3" + +[[bench]] +name = "search_benchmark" +harness = false diff --git a/meilisearch-core/benches/search_benchmark.rs b/meilisearch-core/benches/search_benchmark.rs new file mode 100644 index 000000000..506bc8950 --- /dev/null +++ b/meilisearch-core/benches/search_benchmark.rs @@ -0,0 +1,95 @@ +#[cfg(test)] +#[macro_use] +extern crate assert_matches; + +use std::sync::mpsc; +use std::path::Path; +use std::fs; +use std::iter; + +use meilisearch_core::Database; +use meilisearch_core::{ProcessedUpdateResult, UpdateStatus}; +use serde_json::Value; + +use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId}; + +fn prepare_database(path: &Path) -> Database { + let database = Database::open_or_create(path).unwrap(); + let db = &database; + + let (sender, receiver) = mpsc::sync_channel(100); + let update_fn = move |_name: &str, update: ProcessedUpdateResult| { + sender.send(update.update_id).unwrap() + }; + let index = database.create_index("bench").unwrap(); + + database.set_update_callback(Box::new(update_fn)); + + let schema = { + let path = concat!(env!("CARGO_MANIFEST_DIR"), "/../datasets/movies/schema.toml"); + let string = fs::read_to_string(path).expect("find schema"); + toml::from_str(&string).unwrap() + }; + + let mut update_writer = db.update_write_txn().unwrap(); + let _update_id = index.schema_update(&mut update_writer, schema).unwrap(); + update_writer.commit().unwrap(); + + let mut additions = index.documents_addition(); + + let json: Value = { + let path = concat!(env!("CARGO_MANIFEST_DIR"), "/../datasets/movies/movies.json"); + let movies_file = fs::File::open(path).expect("find movies"); + serde_json::from_reader(movies_file).unwrap() + }; + + let documents = json.as_array().unwrap(); + + for document in documents { + additions.update_document(document); + } + + let mut update_writer = db.update_write_txn().unwrap(); + let update_id = additions.finalize(&mut update_writer).unwrap(); + update_writer.commit().unwrap(); + + // block until the transaction is processed + let _ = receiver.into_iter().find(|id| *id == update_id); + + let update_reader = db.update_read_txn().unwrap(); + let result = index.update_status(&update_reader, update_id).unwrap(); + assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_none()); + + database +} + +pub fn criterion_benchmark(c: &mut Criterion) { + let dir = tempfile::tempdir().unwrap(); + let database = prepare_database(dir.path()); + + let reader = database.main_read_txn().unwrap(); + let index = database.open_index("bench").unwrap(); + + let mut count = 0; + let query = "I love paris "; + + let iter = iter::from_fn(|| { + count += 1; + query.get(0..count) + }); + + let mut group = c.benchmark_group("searching in movies"); + group.sample_size(10); + + for query in iter { + let bench_name = BenchmarkId::new("query", format!("{:?}", query)); + group.bench_with_input(bench_name, &query, |b, query| b.iter(|| { + let builder = index.query_builder(); + builder.query(&reader, query, 0..20).unwrap(); + })); + } + group.finish(); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); From 951f0bcb102891b9dfad0fdd63c3cd1d112d2a87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 29 Nov 2019 12:28:46 +0100 Subject: [PATCH 2/3] sqaush-me: Improve benchmarks naming --- meilisearch-core/benches/search_benchmark.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch-core/benches/search_benchmark.rs b/meilisearch-core/benches/search_benchmark.rs index 506bc8950..568442ea0 100644 --- a/meilisearch-core/benches/search_benchmark.rs +++ b/meilisearch-core/benches/search_benchmark.rs @@ -78,11 +78,11 @@ pub fn criterion_benchmark(c: &mut Criterion) { query.get(0..count) }); - let mut group = c.benchmark_group("searching in movies"); + let mut group = c.benchmark_group("searching in movies (19654 docs)"); group.sample_size(10); for query in iter { - let bench_name = BenchmarkId::new("query", format!("{:?}", query)); + let bench_name = BenchmarkId::from_parameter(format!("{:?}", query)); group.bench_with_input(bench_name, &query, |b, query| b.iter(|| { let builder = index.query_builder(); builder.query(&reader, query, 0..20).unwrap(); From a8272f0eeffc5ffd742ad524147652654bb7af08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 13 Dec 2019 14:17:07 +0100 Subject: [PATCH 3/3] Add a benchmark github workflow --- .github/workflows/benchmarks.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 .github/workflows/benchmarks.yml diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml new file mode 100644 index 000000000..845e11171 --- /dev/null +++ b/.github/workflows/benchmarks.yml @@ -0,0 +1,15 @@ +--- +on: [pull_request] + +name: Benchmark pull requests + +jobs: + runBenchmark: + name: run benchmark + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - name: run benchmark + uses: matchai/criterion-compare-action@master + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}